Phase 8 Complete: Sync Audit Log + Frontend Integration

- Thêm audit/sync_audit.py: Ghi lịch sử sync vào audit/sync_log.json
- Thêm API endpoints: /sync/history, /sync/history/{run_id}
- Frontend: Nút 'Lịch sử đồng bộ' + panel expand chi tiết từng file
- Sửa frontend served từ backend (http://localhost:8000)
- Cập nhật DEPLOYMENT_GUIDE với hướng dẫn chạy localhost
- Cập nhật ARCHITECTURE_MAP: Phase 8 hoàn thành
This commit is contained in:
2026-05-11 08:49:10 +00:00
parent f937d1a98e
commit 78372d18ee
8 changed files with 577 additions and 18 deletions

View File

@@ -5,7 +5,8 @@ import secrets
from enum import Enum
from typing import List, Optional, Dict, Any
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request, status
from fastapi.responses import RedirectResponse
from fastapi.responses import RedirectResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field, validator
import uvicorn
@@ -24,6 +25,7 @@ from extraction.ocr_service import OCRService
from extraction.text_extractor import TextExtractor
from chunking.markdown_chunker import MarkdownChunker
from indexing.vector_store import VectorStore
from audit import sync_audit
# --- Cấu hình Logging chuyên nghiệp ---
logging.basicConfig(
@@ -50,6 +52,15 @@ app.add_middleware(
allow_headers=["*"],
)
# Serve frontend static files
FRONTEND_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "frontend")
app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")
@app.get("/", tags=["Frontend"])
async def serve_frontend():
"""Serve frontend index.html."""
return FileResponse(os.path.join(FRONTEND_DIR, "index.html"))
# --- Singleton Engine Instance ---
rag_engine = None
sync_status = {"running": False, "last_run": None, "processed": 0, "skipped": 0, "errors": []}
@@ -270,6 +281,8 @@ def run_sync_background():
global sync_status
sync_status = {"running": True, "last_run": None, "processed": 0, "skipped": 0, "errors": []}
run_id = sync_audit.start_run()
try:
provider = SharePointProvider()
dce = DocumentClassificationEngine(provider=provider)
@@ -304,16 +317,22 @@ def run_sync_background():
if classification.processing_policy in (ProcessingPolicy.UNSUPPORTED, ProcessingPolicy.METADATA_ONLY, ProcessingPolicy.REQUIRES_REVIEW):
sync_status["skipped"] += 1
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, 0, "skipped", classification.reason)
continue
try:
file_bytes = provider.download_file(item)
except Exception as e:
sync_status["errors"].append(f"{name}: download failed")
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, 0, "error", str(e))
continue
if not file_bytes:
sync_status["errors"].append(f"{name}: empty file")
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, 0, "error", "empty file")
continue
pages = []
@@ -336,6 +355,8 @@ def run_sync_background():
if not pages:
sync_status["skipped"] += 1
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, 0, "skipped", "no content extracted")
continue
metadata = {
@@ -352,16 +373,22 @@ def run_sync_background():
vector_db.delete_by_file_id(item_id)
vector_db.embed_and_index(chunks)
sync_status["processed"] += 1
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, len(chunks), "indexed")
logger.info(f"Sync: Indexed {name}{len(chunks)} chunks")
else:
sync_status["skipped"] += 1
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
classification.doc_type.value, 0, "skipped", "no chunks generated")
sync_status["last_run"] = "completed"
sync_audit.finish_run(run_id, "completed")
logger.info(f"Sync completed: {sync_status['processed']} processed, {sync_status['skipped']} skipped")
except Exception as e:
sync_status["last_run"] = "failed"
sync_status["errors"].append(str(e))
sync_audit.finish_run(run_id, "failed")
logger.error(f"Sync failed: {e}")
finally:
sync_status["running"] = False
@@ -382,8 +409,30 @@ async def sync_endpoint(background_tasks: BackgroundTasks):
@app.get("/sync/status", tags=["Ingestion"])
async def sync_status_endpoint():
"""Kiểm tra trạng thái đồng bộ."""
"""Kiểm tra trạng thái đồng bộ hiện tại."""
return sync_status
@app.get("/sync/history", tags=["Ingestion"])
async def sync_history_endpoint(limit: int = 10):
"""
Lấy lịch sử đồng bộ (audit log).
Args:
limit: Số lần sync gần nhất (mặc định 10)
"""
return {"runs": sync_audit.get_history(limit)}
@app.get("/sync/history/{run_id}", tags=["Ingestion"])
async def sync_run_detail_endpoint(run_id: str):
"""
Lấy chi tiết của 1 lần đồng bộ.
"""
run = sync_audit.get_run_detail(run_id)
if not run:
raise HTTPException(status_code=404, detail="Run not found")
return run
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)