diff --git a/api/main.py b/api/main.py
index c44ed3e..8fadce4 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,93 +1,138 @@
 import logging
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from pydantic import BaseModel
-from typing import List, Optional, Dict, Any
-import uvicorn
 import sys
 import os
+from enum import Enum
+from typing import List, Optional, Dict, Any
+from fastapi import FastAPI, HTTPException, BackgroundTasks, status
+from pydantic import BaseModel, Field, validator
+import uvicorn
 
-# Thêm thư mục gốc vào PYTHONPATH để tìm thấy các module chat, search, ingestion...
+# Đảm bảo đường dẫn module
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from chat.rag_engine import RAGEngine
 from core.config import settings
 
-# Cấu hình Logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-logger = logging.getLogger("API")
+# --- Cấu hình Logging chuyên nghiệp ---
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S"
+)
+logger = logging.getLogger("RAG_API")
 
 app = FastAPI(
-    title="PoC SharePoint RAG API",
-    description="Hệ thống hỏi đáp nội bộ dựa trên SharePoint và Distributed VLM",
-    version="1.0.0"
+    title="Enterprise SharePoint RAG API",
+    description="Hệ thống hỏi đáp AI nội bộ sử dụng kiến trúc Modular Providers và Distributed VLM.",
+    version="1.1.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
 )
 
-# Khởi tạo RAG Engine (Singleton)
-# Khi API khởi động, nó sẽ nạp sẵn Embedding Model vào RAM để phản hồi cực nhanh
-try:
-    if settings.opensearch_host == "opensearch":
-        settings.opensearch_host = "localhost" # Fallback cho Local Dev
-    rag_engine = RAGEngine()
-except Exception as e:
-    logger.error(f"Không thể khởi động RAG Engine: {e}")
-    rag_engine = None
+# --- Singleton Engine Instance ---
+rag_engine = None
+
+@app.on_event("startup")
+async def startup_event():
+    global rag_engine
+    try:
+        logger.info(f"Đang khởi tạo RAG Engine với Provider: {settings.llm_provider}")
+        # Đảm bảo host OpenSearch đúng trong môi trường dev
+        if settings.opensearch_host == "opensearch" and os.environ.get("ENV") != "docker":
+            settings.opensearch_host = "localhost"
+            
+        rag_engine = RAGEngine()
+        
+        # Thông báo sẵn sàng kèm địa chỉ truy cập
+        host = "0.0.0.0"
+        port = 8000
+        logger.info("="*60)
+        logger.info("🚀 RAG ENGINE ĐÃ SẴN SÀNG PHỤC VỤ!")
+        logger.info(f"🔗 API Endpoint: http://localhost:{port}")
+        logger.info(f"📖 Swagger UI:  http://localhost:{port}/docs")
+        logger.info(f"📊 Health Check: http://localhost:{port}/health")
+        logger.info("="*60)
+    except Exception as e:
+        logger.critical(f"❌ THẤT BẠI khi khởi động RAG Engine: {e}")
+
+# --- SCHEMAS ---
+
+class ChatRole(str, Enum):
+    user = "user"
+    assistant = "assistant"
+    system = "system"
+
+class ChatHistoryItem(BaseModel):
+    role: ChatRole = Field(..., description="Vai trò của người gửi (user/assistant)")
+    content: str = Field(..., min_length=1, description="Nội dung tin nhắn")
 
-# --- MODELS ---
 class ChatRequest(BaseModel):
-    query: str
-    history: Optional[List[Dict[str, str]]] = []
+    query: str = Field(
+        ..., 
+        min_length=2, 
+        max_length=1000, 
+        example="Quy trình bảo trì thiết bị là gì?",
+        description="Câu hỏi của người dùng"
+    )
+    history: List[ChatHistoryItem] = Field(
+        default_factory=list,
+        description="Lịch sử cuộc trò chuyện để duy trì ngữ cảnh"
+    )
+
+class SourceCitation(BaseModel):
+    file_name: str
+    page: int
+    url: Optional[str] = None
 
 class ChatResponse(BaseModel):
-    answer: str
-    sources: List[Dict[str, Any]] = []
+    answer: str = Field(..., description="Câu trả lời từ AI")
+    sources: List[SourceCitation] = Field(default_factory=list, description="Danh sách các nguồn trích dẫn từ tài liệu")
+    context_used: Optional[str] = Field(None, description="Ngữ cảnh thực tế đã được trích xuất từ VectorDB (Dùng cho Debug/UI)")
 
 # --- ENDPOINTS ---
 
-@app.get("/health")
-def health_check():
-    """Kiểm tra sức khỏe hệ thống"""
+@app.get("/health", tags=["System"])
+async def health_check():
+    """Kiểm tra trạng thái kết nối tới OpenSearch và LLM."""
     return {
-        "status": "healthy",
-        "llm_provider": settings.llm_provider,
-        "opensearch_host": settings.opensearch_host
+        "status": "online" if rag_engine else "offline",
+        "engine_ready": rag_engine is not None,
+        "config": {
+            "provider": settings.llm_provider,
+            "opensearch_host": settings.opensearch_host
+        }
     }
 
-@app.post("/chat", response_model=ChatResponse)
+@app.post("/chat", response_model=ChatResponse, tags=["RAG"], status_code=status.HTTP_200_OK)
 async def chat_endpoint(request: ChatRequest):
     """
-    Điểm cuối để thực hiện hỏi đáp (RAG).
+    Điểm cuối xử lý hội thoại RAG.
+    Hệ thống sẽ tự động trích xuất ngữ cảnh từ OpenSearch và sử dụng Provider đã cấu hình để trả lời.
     """
     if not rag_engine:
-        raise HTTPException(status_code=503, detail="RAG Engine chưa được khởi tạo thành công.")
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE, 
+            detail="Hệ thống RAG đang khởi động hoặc gặp sự cố kết nối Database."
+        )
     
     try:
-        logger.info(f"Nhận câu hỏi: {request.query}")
-        result = rag_engine.chat(request.query, history=request.history)
+        # Chuyển đổi ChatHistoryItem sang format dict cho RAGEngine
+        history_data = [item.dict() for item in request.history]
+        
+        logger.info(f"Xử lý truy vấn: {request.query[:50]}...")
+        result = rag_engine.chat(request.query, history=history_data)
         
         return ChatResponse(
             answer=result["answer"],
-            sources=result["sources"]
+            sources=result["sources"],
+            context_used=result.get("context_used")
         )
     except Exception as e:
-        logger.error(f"Lỗi xử lý chat: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.post("/ingest")
-async def start_ingestion(background_tasks: BackgroundTasks):
-    """
-    Kích hoạt quá trình quét SharePoint và nạp dữ liệu vào OpenSearch.
-    Chạy dưới dạng Background Task để không làm treo API.
-    """
-    # TODO: Kết nối với script sync.py và quy trình extraction
-    # Ở đây chúng ta sẽ gọi một function xử lý bất đồng bộ
-    background_tasks.add_task(dummy_ingest_task)
-    return {"message": "Quá trình đồng bộ dữ liệu đã bắt đầu chạy ngầm."}
-
-async def dummy_ingest_task():
-    logger.info("Bắt đầu Ingestion task...")
-    # Sẽ tích hợp logic từ test_rag_pipeline.py vào đây
-    pass
+        logger.error(f"Lỗi thực thi RAG: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Đã xảy ra lỗi nội bộ trong quá trình xử lý ngôn ngữ."
+        )
 
 if __name__ == "__main__":
-    # Chạy server tại cổng 8000
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)