39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
import os
|
|
import sys
|
|
import json
|
|
import logging
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from core.models import IngestedDocument
|
|
from extraction.dce import DocumentClassificationEngine
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
|
|
|
|
def main():
|
|
print("=== STARTING DCE PIPELINE TEST ===")
|
|
|
|
if not os.path.exists("ingestion_output.json"):
|
|
print("File ingestion_output.json not found! Please run ingestion sync first.")
|
|
return
|
|
|
|
with open("ingestion_output.json", "r", encoding="utf-8") as f:
|
|
items = json.load(f)
|
|
|
|
dce = DocumentClassificationEngine()
|
|
|
|
print(f"Loaded {len(items)} items from ingestion_output.json\n")
|
|
|
|
for item in items:
|
|
if item.get("is_folder"):
|
|
continue # DCE only processes files
|
|
|
|
doc = IngestedDocument(**item)
|
|
|
|
print(f"\n--- Processing: {doc.name} ---")
|
|
result = dce.classify(doc)
|
|
print(f">> Policy: {result.processing_policy.value} | Reason: {result.reason}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|