feat: add osm data

This commit is contained in:
2025-09-21 16:23:44 +02:00
parent 63e23d6600
commit c1b502c878
39 changed files with 955 additions and 338 deletions
+3 -1
View File
@@ -82,7 +82,9 @@ class EnsembleModel(TraditionalModel):
# Soft voting averages probabilities (preferred when members are calibrated);
# hard voting uses majority class. Parallelize member predictions.
voting_type = params.get("voting", "soft") # 'hard' or 'soft'
return VotingClassifier(estimators=estimators, voting=voting_type, n_jobs=params.get("n_jobs", -1))
return VotingClassifier(
estimators=estimators, voting=voting_type, n_jobs=params.get("n_jobs", -1)
)
def prepare_features(self, X: pd.DataFrame) -> np.ndarray:
text_features = []
+3 -1
View File
@@ -55,7 +55,9 @@ class RandomForestModel(TraditionalModel):
encoder = self.label_encoders[feature_key]
column_clean = column.fillna("unknown").astype(str)
known_classes = set(encoder.classes_)
default_class = "unknown" if "unknown" in known_classes else encoder.classes_[0]
default_class = (
"unknown" if "unknown" in known_classes else encoder.classes_[0]
)
column_mapped = column_clean.apply(
lambda value: value if value in known_classes else default_class
)
+3 -3
View File
@@ -36,9 +36,9 @@ class TransformerModel(NeuralNetworkModel):
# Add positional encoding
positions = tf.range(start=0, limit=params.get("max_len", 8), delta=1)
pos_embedding = Embedding(input_dim=params.get("max_len", 8), output_dim=params.get("embedding_dim", 64))(
positions
)
pos_embedding = Embedding(
input_dim=params.get("max_len", 8), output_dim=params.get("embedding_dim", 64)
)(positions)
x = x + pos_embedding
x = self._transformer_encoder(x, params)