Files
poc_system/ingestion/providers/base_provider.py

37 lines
1.2 KiB
Python

from abc import ABC, abstractmethod
from typing import Dict, List, Tuple
class BaseStorageProvider(ABC):
"""
Abstract Base Class for all Document Storage Providers (SharePoint, Google Drive, Local, NAS, etc.)
Any new storage source must implement these methods to be seamlessly integrated into the ingestion pipeline.
"""
@abstractmethod
def fetch_changes(self, sync_state: Dict) -> Tuple[List[Dict], Dict]:
"""
Fetch incremental changes (new, updated, or deleted files).
Args:
sync_state (Dict): The last known synchronization state/token.
Returns:
Tuple[List[Dict], Dict]:
- A list of standardized item dictionaries.
- The new sync state to be saved for the next run.
"""
pass
@abstractmethod
def download_file(self, target_item: Dict) -> bytes:
"""
Download the raw file bytes for a given item.
Args:
target_item (Dict): The standardized item dictionary returned by fetch_changes.
Returns:
bytes: The raw file content.
"""
pass