60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
import logging
|
|
from typing import List, Dict
|
|
from search.retriever import SearchRetriever
|
|
from .llm_factory import LLMFactory
|
|
|
|
logger = logging.getLogger("RAGEngine")
|
|
|
|
|
|
class RAGEngine:
|
|
def __init__(self):
|
|
self.retriever = SearchRetriever()
|
|
self.llm = LLMFactory.get_provider()
|
|
logger.info(f"RAG Engine ready with LLM Provider: {type(self.llm).__name__}")
|
|
|
|
def chat(self, user_query: str, history: List[Dict[str, str]] = None, user_email: str = None, is_admin: bool = False) -> Dict:
|
|
"""
|
|
Quy trình RAG: Search -> Augment -> Generate
|
|
|
|
Args:
|
|
user_query: Câu hỏi
|
|
history: Lịch sử chat
|
|
user_email: Email user để filter quyền
|
|
is_admin: True = bypass ACL
|
|
"""
|
|
logger.info(f"Search query: {user_query[:100]} (user={user_email or 'none'}, admin={is_admin})")
|
|
relevant_chunks = self.retriever.retrieve(user_query, top_k=5, user_email=user_email, is_admin=is_admin)
|
|
|
|
if not relevant_chunks:
|
|
context_text = "Không tìm thấy thông tin liên quan trong cơ sở dữ liệu nội bộ."
|
|
logger.info("Search result: 0 chunks found")
|
|
else:
|
|
context_text = "\n---\n".join([
|
|
f"[Nguồn: {c.file_name}, Trang: {c.page_from}]\nNội dung: {c.text}"
|
|
for c in relevant_chunks
|
|
])
|
|
logger.info(f"Search result: {len(relevant_chunks)} chunks from {len(set(c.file_name for c in relevant_chunks))} files")
|
|
|
|
# 2. GENERATION
|
|
logger.info("Requesting LLM to generate answer...")
|
|
answer = self.llm.generate_response(
|
|
prompt=user_query,
|
|
context=context_text,
|
|
history=history
|
|
)
|
|
logger.info(f"LLM response length: {len(answer)} chars")
|
|
|
|
# 3. Return with citations
|
|
return {
|
|
"answer": answer,
|
|
"context_used": context_text,
|
|
"sources": [
|
|
{
|
|
"file_name": c.file_name,
|
|
"page": c.page_from,
|
|
"url": c.source_url,
|
|
"download_url": c.download_url
|
|
} for c in relevant_chunks
|
|
]
|
|
}
|