This commit is contained in:
2025-08-12 23:17:18 +02:00
parent 3977d5c313
commit 47e52d130c
4 changed files with 4 additions and 3 deletions
+3 -3
View File
@@ -3,10 +3,10 @@ debug: true
# Processing settings # Processing settings
processing: processing:
batch_size: 10_000 batch_size: 100_000
max_workers: 8 max_workers: 1
checkpoint_interval: 10 checkpoint_interval: 10
use_multiprocessing: true use_multiprocessing: false
# Pipeline stages # Pipeline stages
stages: stages:
View File
@@ -12,6 +12,7 @@ class ConnectorFormatter(BaseNameFormatter):
surname = row['probable_surname'] if pd.notna(row['probable_surname']) else '' surname = row['probable_surname'] if pd.notna(row['probable_surname']) else ''
connector = random.choice(self.connectors) connector = random.choice(self.connectors)
# Connect native parts with a random connector
if len(native_parts) > 1: if len(native_parts) > 1:
connected_native = f" {connector} ".join(native_parts) connected_native = f" {connector} ".join(native_parts)
full_name = f"{connected_native} {surname}".strip() full_name = f"{connected_native} {surname}".strip()
View File