91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
import os
|
|
import json
|
|
import logging
|
|
from typing import List, Dict, Any
|
|
|
|
import sys
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from ingestion.providers.base_provider import BaseStorageProvider
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("IngestionSync")
|
|
|
|
|
|
class SyncEngine:
|
|
"""
|
|
Bộ điều phối đồng bộ không phụ thuộc vào nhà cung cấp cụ thể.
|
|
Nhận vào bất kỳ BaseStorageProvider nào (SharePoint, Google Drive, NAS, ...).
|
|
"""
|
|
def __init__(self, provider: BaseStorageProvider, state_file: str = "delta_state.json", output_file: str = "ingestion_output.json"):
|
|
self.provider = provider
|
|
self.state_file = state_file
|
|
self.output_file = output_file
|
|
|
|
def _load_sync_state(self) -> Dict:
|
|
"""Load sync state từ local file."""
|
|
if os.path.exists(self.state_file):
|
|
with open(self.state_file, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
def _save_sync_state(self, state: Dict):
|
|
"""Save sync state ra local file."""
|
|
with open(self.state_file, "w", encoding="utf-8") as f:
|
|
json.dump(state, f, indent=2, ensure_ascii=False)
|
|
|
|
def _upsert_to_local_db(self, new_items: List[Dict[str, Any]]):
|
|
"""Lưu kết quả vào local JSON (mô phỏng DB)."""
|
|
db = {}
|
|
if os.path.exists(self.output_file):
|
|
with open(self.output_file, "r", encoding="utf-8") as f:
|
|
try:
|
|
existing = json.load(f)
|
|
for item in existing:
|
|
db[item["id"]] = item
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
for item in new_items:
|
|
item_id = item.get("id")
|
|
if item.get("is_deleted"):
|
|
if item_id in db:
|
|
db[item_id]["is_deleted"] = True
|
|
else:
|
|
db[item_id] = item
|
|
else:
|
|
db[item_id] = item
|
|
|
|
final_list = list(db.values())
|
|
with open(self.output_file, "w", encoding="utf-8") as f:
|
|
json.dump(final_list, f, indent=2, ensure_ascii=False)
|
|
|
|
logger.info(f"Local database updated. Total items: {len(final_list)}")
|
|
|
|
def run_sync(self):
|
|
"""Chạy đồng bộ: fetch changes từ provider -> lưu local."""
|
|
logger.info("=== STARTING SYNC ===")
|
|
|
|
sync_state = self._load_sync_state()
|
|
if sync_state:
|
|
logger.info("Found existing sync state. Performing INCREMENTAL sync.")
|
|
else:
|
|
logger.info("No sync state found. Performing FULL sync.")
|
|
|
|
items, new_state = self.provider.fetch_changes(sync_state)
|
|
logger.info(f"Provider returned {len(items)} change(s).")
|
|
|
|
if items:
|
|
self._upsert_to_local_db(items)
|
|
|
|
self._save_sync_state(new_state)
|
|
logger.info("Sync state saved.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from ingestion.providers.sharepoint_provider import SharePointProvider
|
|
|
|
provider = SharePointProvider()
|
|
engine = SyncEngine(provider)
|
|
engine.run_sync()
|