This commit is contained in:
2025-08-12 23:17:18 +02:00
parent 3977d5c313
commit 47e52d130c
4 changed files with 4 additions and 3 deletions
+3 -3
View File
@@ -3,10 +3,10 @@ debug: true
# Processing settings
processing:
batch_size: 10_000
max_workers: 8
batch_size: 100_000
max_workers: 1
checkpoint_interval: 10
use_multiprocessing: true
use_multiprocessing: false
# Pipeline stages
stages:
View File
@@ -12,6 +12,7 @@ class ConnectorFormatter(BaseNameFormatter):
surname = row['probable_surname'] if pd.notna(row['probable_surname']) else ''
connector = random.choice(self.connectors)
# Connect native parts with a random connector
if len(native_parts) > 1:
connected_native = f" {connector} ".join(native_parts)
full_name = f"{connected_native} {surname}".strip()
View File