feat: add NER annotation step and integrate into pipeline

This commit is contained in:
2025-08-11 07:13:09 +02:00
parent 6d39c3afc1
commit d5a4aaaf4a
23 changed files with 1108 additions and 160 deletions
+10 -2
View File
@@ -6,9 +6,10 @@ from dataclasses import dataclass
from typing import List, Optional
import pandas as pd
from pydantic import BaseModel
from processing.batch.batch_config import BatchConfig
from core.config.pipeline_config import PipelineConfig
from processing.batch.batch_config import BatchConfig
@dataclass
@@ -25,11 +26,18 @@ class PipelineState:
self.failed_batches = []
class NameAnnotation(BaseModel):
"""Model for name annotation results"""
identified_name: Optional[str]
identified_surname: Optional[str]
class PipelineStep(ABC):
"""Abstract base class for pipeline steps"""
def __init__(
self, name: str, pipeline_config: PipelineConfig, batch_config: Optional[BatchConfig] = None
self, name: str, pipeline_config: PipelineConfig, batch_config: Optional[BatchConfig] = None
):
self.name = name
self.pipeline_config = pipeline_config