feat: stabilize name analysis
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import unicodedata
|
||||||
from typing import Optional, Dict, Tuple
|
from typing import Optional, Dict, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -13,6 +14,16 @@ class RegionMapper:
|
|||||||
def map(self, series: pd.Series) -> pd.Series:
|
def map(self, series: pd.Series) -> pd.Series:
|
||||||
return series.str.lower().map(self.mapping).fillna("AUTRES")
|
return series.str.lower().map(self.mapping).fillna("AUTRES")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clean_province(series: pd.Series) -> pd.Series:
|
||||||
|
return (
|
||||||
|
series.str.upper()
|
||||||
|
.str.strip()
|
||||||
|
.apply(lambda x: unicodedata.normalize("NFKD", x)
|
||||||
|
.encode("ascii", errors="ignore")
|
||||||
|
.decode("utf-8") if isinstance(x, str) else x)
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_provinces():
|
def get_provinces():
|
||||||
return [
|
return [
|
||||||
|
|||||||
Vendored
+2373
File diff suppressed because one or more lines are too long
Vendored
-2002
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user