Phase 8 Complete: Sync Audit Log + Frontend Integration
- Thêm audit/sync_audit.py: Ghi lịch sử sync vào audit/sync_log.json
- Thêm API endpoints: /sync/history, /sync/history/{run_id}
- Frontend: Nút 'Lịch sử đồng bộ' + panel expand chi tiết từng file
- Sửa frontend served từ backend (http://localhost:8000)
- Cập nhật DEPLOYMENT_GUIDE với hướng dẫn chạy localhost
- Cập nhật ARCHITECTURE_MAP: Phase 8 hoàn thành
This commit is contained in:
53
api/main.py
53
api/main.py
@@ -5,7 +5,8 @@ import secrets
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request, status
|
||||
from fastapi.responses import RedirectResponse
|
||||
from fastapi.responses import RedirectResponse, FileResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field, validator
|
||||
import uvicorn
|
||||
@@ -24,6 +25,7 @@ from extraction.ocr_service import OCRService
|
||||
from extraction.text_extractor import TextExtractor
|
||||
from chunking.markdown_chunker import MarkdownChunker
|
||||
from indexing.vector_store import VectorStore
|
||||
from audit import sync_audit
|
||||
|
||||
# --- Cấu hình Logging chuyên nghiệp ---
|
||||
logging.basicConfig(
|
||||
@@ -50,6 +52,15 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Serve frontend static files
|
||||
FRONTEND_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "frontend")
|
||||
app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")
|
||||
|
||||
@app.get("/", tags=["Frontend"])
|
||||
async def serve_frontend():
|
||||
"""Serve frontend index.html."""
|
||||
return FileResponse(os.path.join(FRONTEND_DIR, "index.html"))
|
||||
|
||||
# --- Singleton Engine Instance ---
|
||||
rag_engine = None
|
||||
sync_status = {"running": False, "last_run": None, "processed": 0, "skipped": 0, "errors": []}
|
||||
@@ -270,6 +281,8 @@ def run_sync_background():
|
||||
global sync_status
|
||||
sync_status = {"running": True, "last_run": None, "processed": 0, "skipped": 0, "errors": []}
|
||||
|
||||
run_id = sync_audit.start_run()
|
||||
|
||||
try:
|
||||
provider = SharePointProvider()
|
||||
dce = DocumentClassificationEngine(provider=provider)
|
||||
@@ -304,16 +317,22 @@ def run_sync_background():
|
||||
|
||||
if classification.processing_policy in (ProcessingPolicy.UNSUPPORTED, ProcessingPolicy.METADATA_ONLY, ProcessingPolicy.REQUIRES_REVIEW):
|
||||
sync_status["skipped"] += 1
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, 0, "skipped", classification.reason)
|
||||
continue
|
||||
|
||||
try:
|
||||
file_bytes = provider.download_file(item)
|
||||
except Exception as e:
|
||||
sync_status["errors"].append(f"{name}: download failed")
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, 0, "error", str(e))
|
||||
continue
|
||||
|
||||
if not file_bytes:
|
||||
sync_status["errors"].append(f"{name}: empty file")
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, 0, "error", "empty file")
|
||||
continue
|
||||
|
||||
pages = []
|
||||
@@ -336,6 +355,8 @@ def run_sync_background():
|
||||
|
||||
if not pages:
|
||||
sync_status["skipped"] += 1
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, 0, "skipped", "no content extracted")
|
||||
continue
|
||||
|
||||
metadata = {
|
||||
@@ -352,16 +373,22 @@ def run_sync_background():
|
||||
vector_db.delete_by_file_id(item_id)
|
||||
vector_db.embed_and_index(chunks)
|
||||
sync_status["processed"] += 1
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, len(chunks), "indexed")
|
||||
logger.info(f"Sync: Indexed {name} → {len(chunks)} chunks")
|
||||
else:
|
||||
sync_status["skipped"] += 1
|
||||
sync_audit.log_file(run_id, name, item_id, classification.processing_policy.value,
|
||||
classification.doc_type.value, 0, "skipped", "no chunks generated")
|
||||
|
||||
sync_status["last_run"] = "completed"
|
||||
sync_audit.finish_run(run_id, "completed")
|
||||
logger.info(f"Sync completed: {sync_status['processed']} processed, {sync_status['skipped']} skipped")
|
||||
|
||||
except Exception as e:
|
||||
sync_status["last_run"] = "failed"
|
||||
sync_status["errors"].append(str(e))
|
||||
sync_audit.finish_run(run_id, "failed")
|
||||
logger.error(f"Sync failed: {e}")
|
||||
finally:
|
||||
sync_status["running"] = False
|
||||
@@ -382,8 +409,30 @@ async def sync_endpoint(background_tasks: BackgroundTasks):
|
||||
|
||||
@app.get("/sync/status", tags=["Ingestion"])
|
||||
async def sync_status_endpoint():
|
||||
"""Kiểm tra trạng thái đồng bộ."""
|
||||
"""Kiểm tra trạng thái đồng bộ hiện tại."""
|
||||
return sync_status
|
||||
|
||||
|
||||
@app.get("/sync/history", tags=["Ingestion"])
|
||||
async def sync_history_endpoint(limit: int = 10):
|
||||
"""
|
||||
Lấy lịch sử đồng bộ (audit log).
|
||||
Args:
|
||||
limit: Số lần sync gần nhất (mặc định 10)
|
||||
"""
|
||||
return {"runs": sync_audit.get_history(limit)}
|
||||
|
||||
|
||||
@app.get("/sync/history/{run_id}", tags=["Ingestion"])
|
||||
async def sync_run_detail_endpoint(run_id: str):
|
||||
"""
|
||||
Lấy chi tiết của 1 lần đồng bộ.
|
||||
"""
|
||||
run = sync_audit.get_run_detail(run_id)
|
||||
if not run:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
return run
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
|
||||
|
||||
Reference in New Issue
Block a user