feat: stabilize name analysis

This commit is contained in:
2025-09-25 23:17:49 +02:00
parent 4874b178c9
commit 817081b443
3 changed files with 2384 additions and 2002 deletions
+11
View File
@@ -1,3 +1,4 @@
import unicodedata
from typing import Optional, Dict, Tuple
import pandas as pd
@@ -13,6 +14,16 @@ class RegionMapper:
def map(self, series: pd.Series) -> pd.Series:
return series.str.lower().map(self.mapping).fillna("AUTRES")
@staticmethod
def clean_province(series: pd.Series) -> pd.Series:
return (
series.str.upper()
.str.strip()
.apply(lambda x: unicodedata.normalize("NFKD", x)
.encode("ascii", errors="ignore")
.decode("utf-8") if isinstance(x, str) else x)
)
@staticmethod
def get_provinces():
return [
+2373
View File
File diff suppressed because one or more lines are too long
-2002
View File
File diff suppressed because one or more lines are too long