Xu ly SSO

This commit is contained in:
2026-05-09 10:31:28 +00:00
parent 9d04e7484c
commit f937d1a98e
21 changed files with 2515 additions and 271 deletions

View File

@@ -1,4 +1,5 @@
import logging
import os
from typing import List
from opensearchpy import OpenSearch, RequestsHttpConnection
from core.models import DocumentChunk
@@ -10,9 +11,12 @@ class VectorStore:
def __init__(self, index_name: str = "sharepoint_docs"):
self.index_name = index_name
# Kết nối tới OpenSearch Cluster
host = settings.opensearch_host
if host == "opensearch" and os.environ.get("ENV") != "docker":
host = "localhost"
self.client = OpenSearch(
hosts=[{'host': settings.opensearch_host, 'port': settings.opensearch_port}],
hosts=[{'host': host, 'port': settings.opensearch_port}],
http_auth=(settings.opensearch_user, settings.opensearch_pass),
use_ssl=False,
verify_certs=False,
@@ -64,6 +68,7 @@ class VectorStore:
"page_from": { "type": "integer" },
"page_to": { "type": "integer" },
"source_url": { "type": "keyword" },
"download_url": { "type": "keyword" },
"permissions": { "type": "keyword" }
}
}
@@ -71,6 +76,23 @@ class VectorStore:
self.client.indices.create(index=self.index_name, body=mapping)
logger.info(f"Đã tạo OpenSearch Index: {self.index_name}")
def delete_by_file_id(self, file_id: str):
"""Xóa tất cả chunks cũ của một file trước khi nạp lại."""
query = {
"query": {
"term": { "file_id": file_id }
}
}
try:
response = self.client.delete_by_query(index=self.index_name, body=query)
deleted = response.get("deleted", 0)
if deleted > 0:
logger.info(f"Đã xóa {deleted} chunks cũ của file_id={file_id}")
return deleted
except Exception as e:
logger.warning(f"Không thể xóa chunks cũ (có thể index chưa tồn tại): {e}")
return 0
def embed_and_index(self, chunks: List[DocumentChunk]):
"""Biến đổi Text thành Vector và lưu vào Database"""
if not chunks: