Files
poc_system/test_dce_pipeline.py

39 lines
1.1 KiB
Python

import os
import sys
import json
import logging
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from core.models import IngestedDocument
from extraction.dce import DocumentClassificationEngine
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
def main():
print("=== STARTING DCE PIPELINE TEST ===")
if not os.path.exists("ingestion_output.json"):
print("File ingestion_output.json not found! Please run ingestion sync first.")
return
with open("ingestion_output.json", "r", encoding="utf-8") as f:
items = json.load(f)
dce = DocumentClassificationEngine()
print(f"Loaded {len(items)} items from ingestion_output.json\n")
for item in items:
if item.get("is_folder"):
continue # DCE only processes files
doc = IngestedDocument(**item)
print(f"\n--- Processing: {doc.name} ---")
result = dce.classify(doc)
print(f">> Policy: {result.processing_policy.value} | Reason: {result.reason}")
if __name__ == "__main__":
main()