hotfixes
This commit is contained in:
@@ -3,10 +3,10 @@ debug: true
|
||||
|
||||
# Processing settings
|
||||
processing:
|
||||
batch_size: 10_000
|
||||
max_workers: 8
|
||||
batch_size: 100_000
|
||||
max_workers: 1
|
||||
checkpoint_interval: 10
|
||||
use_multiprocessing: true
|
||||
use_multiprocessing: false
|
||||
|
||||
# Pipeline stages
|
||||
stages:
|
||||
|
||||
@@ -12,6 +12,7 @@ class ConnectorFormatter(BaseNameFormatter):
|
||||
surname = row['probable_surname'] if pd.notna(row['probable_surname']) else ''
|
||||
connector = random.choice(self.connectors)
|
||||
|
||||
# Connect native parts with a random connector
|
||||
if len(native_parts) > 1:
|
||||
connected_native = f" {connector} ".join(native_parts)
|
||||
full_name = f"{connected_native} {surname}".strip()
|
||||
|
||||
Reference in New Issue
Block a user