Phase 7: Hoàn thiện Modular RAG Backend với FastAPI và Đa LLM Provider

This commit is contained in:
2026-05-08 07:30:30 +00:00
commit 26d1298cf6
51 changed files with 5360 additions and 0 deletions

59
paddle_debug.txt Normal file
View File

@@ -0,0 +1,59 @@
[{'input_path': None, 'page_index': None, 'doc_preprocessor_res': {'input_path': None, 'page_index': None, 'input_img': array([[[246, ..., 246],
...,
[248, ..., 248]],
...,
[[248, ..., 248],
...,
[255, ..., 255]]], shape=(1684, 1191, 3), dtype=uint8), 'model_settings': {'use_doc_orientation_classify': True, 'use_doc_unwarping': True}, 'angle': 0, 'rot_img': array([[[246, ..., 246],
...,
[248, ..., 248]],
...,
[[248, ..., 248],
...,
[255, ..., 255]]], shape=(1684, 1191, 3), dtype=uint8), 'output_img': array([[[255, ..., 255],
...,
[254, ..., 254]],
...,
[[255, ..., 255],
...,
[255, ..., 255]]], shape=(1684, 1191, 3), dtype=uint8)}, 'dt_polys': [array([[36, 28],
...,
[36, 61]], shape=(4, 2), dtype=int16), array([[ 39, 134],
...,
[ 39, 161]], shape=(4, 2), dtype=int16), array([[ 41, 206],
...,
[ 41, 233]], shape=(4, 2), dtype=int16), array([[ 44, 280],
...,
[ 43, 304]], shape=(4, 2), dtype=int16), array([[ 45, 350],
...,
[ 45, 374]], shape=(4, 2), dtype=int16), array([[ 47, 422],
...,
[ 47, 445]], shape=(4, 2), dtype=int16), array([[ 46, 490],
...,
[ 46, 513]], shape=(4, 2), dtype=int16), array([[ 854, 1345],
...,
[ 851, 1393]], shape=(4, 2), dtype=int16)], 'model_settings': {'use_doc_preprocessor': True, 'use_textline_orientation': False}, 'text_det_params': {'limit_side_len': 64, 'limit_type': 'min', 'thresh': 0.3, 'max_side_limit': 4000, 'box_thresh': 0.6, 'unclip_ratio': 1.5}, 'text_type': 'general', 'text_rec_score_thresh': 0.0, 'return_word_box': False, 'rec_texts': ['Biên bn bàn giao h sơ lưu tr', 'Đây là tài liu scan mu đ kim th nhánh SCAN PDF.', 'Ni dung đưσc vê thành nh ri nhúng vào PDF đ không có text layer.', 'Ngưi nhn: Nguyn Văn A', 'Phòng ban: Hành chính - Tng hp', 'Ngày bàn giao: 06/05/2026', 'Ghi chú: kim tra OCR ting Viêt có du.', 'ĐÃNHN'], 'rec_scores': [0.9541749954223633, 0.9710000157356262, 0.9784880876541138, 0.981657862663269, 0.9962664246559143, 0.9966237545013428, 0.9740114212036133, 0.9314918518066406], 'rec_polys': [array([[36, 28],
...,
[36, 61]], shape=(4, 2), dtype=int16), array([[ 39, 134],
...,
[ 39, 161]], shape=(4, 2), dtype=int16), array([[ 41, 206],
...,
[ 41, 233]], shape=(4, 2), dtype=int16), array([[ 44, 280],
...,
[ 43, 304]], shape=(4, 2), dtype=int16), array([[ 45, 350],
...,
[ 45, 374]], shape=(4, 2), dtype=int16), array([[ 47, 422],
...,
[ 47, 445]], shape=(4, 2), dtype=int16), array([[ 46, 490],
...,
[ 46, 513]], shape=(4, 2), dtype=int16), array([[ 854, 1345],
...,
[ 851, 1393]], shape=(4, 2), dtype=int16)], 'vis_fonts': [<paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>, <paddlex.utils.fonts.Font object at 0x7afeb51ed190>], 'textline_orientation_angles': [-1, -1, -1, -1, -1, -1, -1, -1], 'rec_boxes': array([[ 36, ..., 61],
...,
[ 851, ..., 1401]], shape=(8, 4), dtype=int16)}]