feat: document models

This commit is contained in:
2025-09-20 23:35:54 +02:00
parent dd2a9f2711
commit e41b15a863
13 changed files with 256 additions and 47 deletions
+3
View File
@@ -13,12 +13,15 @@ class NaiveBayesModel(TraditionalModel):
def build_model(self) -> BaseEstimator:
params = self.config.model_params
# Bag-of-character-ngrams aligns with Multinomial NB assumptions; (1,4)
# includes unigrams for coverage and higher n for suffix/prefix cues.
vectorizer = CountVectorizer(
analyzer="char",
ngram_range=params.get("ngram_range", (1, 4)),
max_features=params.get("max_features", 8000),
)
# Laplace smoothing (alpha) counters zero counts for rare n-grams.
classifier = MultinomialNB(alpha=params.get("alpha", 1.0))
return Pipeline([("vectorizer", vectorizer), ("classifier", classifier)])