feat: stabilize name analysis
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import unicodedata
|
||||
from typing import Optional, Dict, Tuple
|
||||
|
||||
import pandas as pd
|
||||
@@ -13,6 +14,16 @@ class RegionMapper:
|
||||
def map(self, series: pd.Series) -> pd.Series:
|
||||
return series.str.lower().map(self.mapping).fillna("AUTRES")
|
||||
|
||||
@staticmethod
|
||||
def clean_province(series: pd.Series) -> pd.Series:
|
||||
return (
|
||||
series.str.upper()
|
||||
.str.strip()
|
||||
.apply(lambda x: unicodedata.normalize("NFKD", x)
|
||||
.encode("ascii", errors="ignore")
|
||||
.decode("utf-8") if isinstance(x, str) else x)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_provinces():
|
||||
return [
|
||||
|
||||
Vendored
+2373
File diff suppressed because one or more lines are too long
Vendored
-2002
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user