import os import json import logging from typing import List, Dict, Any import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from ingestion.providers.base_provider import BaseStorageProvider logging.basicConfig(level=logging.INFO) logger = logging.getLogger("IngestionSync") class SyncEngine: """ Bộ điều phối đồng bộ không phụ thuộc vào nhà cung cấp cụ thể. Nhận vào bất kỳ BaseStorageProvider nào (SharePoint, Google Drive, NAS, ...). """ def __init__(self, provider: BaseStorageProvider, state_file: str = "delta_state.json", output_file: str = "ingestion_output.json"): self.provider = provider self.state_file = state_file self.output_file = output_file def _load_sync_state(self) -> Dict: """Load sync state từ local file.""" if os.path.exists(self.state_file): with open(self.state_file, "r", encoding="utf-8") as f: return json.load(f) return {} def _save_sync_state(self, state: Dict): """Save sync state ra local file.""" with open(self.state_file, "w", encoding="utf-8") as f: json.dump(state, f, indent=2, ensure_ascii=False) def _upsert_to_local_db(self, new_items: List[Dict[str, Any]]): """Lưu kết quả vào local JSON (mô phỏng DB).""" db = {} if os.path.exists(self.output_file): with open(self.output_file, "r", encoding="utf-8") as f: try: existing = json.load(f) for item in existing: db[item["id"]] = item except json.JSONDecodeError: pass for item in new_items: item_id = item.get("id") if item.get("is_deleted"): if item_id in db: db[item_id]["is_deleted"] = True else: db[item_id] = item else: db[item_id] = item final_list = list(db.values()) with open(self.output_file, "w", encoding="utf-8") as f: json.dump(final_list, f, indent=2, ensure_ascii=False) logger.info(f"Local database updated. Total items: {len(final_list)}") def run_sync(self): """Chạy đồng bộ: fetch changes từ provider -> lưu local.""" logger.info("=== STARTING SYNC ===") sync_state = self._load_sync_state() if sync_state: logger.info("Found existing sync state. Performing INCREMENTAL sync.") else: logger.info("No sync state found. Performing FULL sync.") items, new_state = self.provider.fetch_changes(sync_state) logger.info(f"Provider returned {len(items)} change(s).") if items: self._upsert_to_local_db(items) self._save_sync_state(new_state) logger.info("Sync state saved.") if __name__ == "__main__": from ingestion.providers.sharepoint_provider import SharePointProvider provider = SharePointProvider() engine = SyncEngine(provider) engine.run_sync()