From 9f410ca674f0600766f1f80bab88fc342e0ecf01 Mon Sep 17 00:00:00 2001 From: bernard-ng Date: Thu, 24 Jul 2025 14:27:54 +0200 Subject: [PATCH] refactor: fix logging --- processing/prepare.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/processing/prepare.py b/processing/prepare.py index d102f70..4550abb 100644 --- a/processing/prepare.py +++ b/processing/prepare.py @@ -55,6 +55,7 @@ def process(df: pd.DataFrame) -> pd.DataFrame: logging.info("Preprocessing names") df['words'] = df['name'].str.count(' ') + 1 df['length'] = df['name'].str.replace(' ', '', regex=False).str.len() + df['year'] = df['year'].astype(int) # Calculate probable_native and probable_surname name_split = df['name'].str.split() @@ -64,6 +65,7 @@ def process(df: pd.DataFrame) -> pd.DataFrame: df['identified_name'] = None df['identified_surname'] = None df['annotated'] = 0 + df['annotated'] = df['annotated'].astype('Int8') # We can assume that if a name has exactly 3 words, the first two are the native name and the last is the surname # This is a common pattern in Congolese names