feat: document models
This commit is contained in:
@@ -13,17 +13,23 @@ class SVMModel(TraditionalModel):
|
||||
|
||||
def build_model(self) -> BaseEstimator:
|
||||
params = self.config.model_params
|
||||
# TF-IDF downweights very common patterns; char n-grams (2,4) are effective
|
||||
# for distinguishing name morphology under RBF kernels.
|
||||
vectorizer = TfidfVectorizer(
|
||||
analyzer="char",
|
||||
ngram_range=params.get("ngram_range", (2, 4)),
|
||||
max_features=params.get("max_features", 5000),
|
||||
)
|
||||
|
||||
# RBF kernel captures non-linear interactions between n-grams; probability=True
|
||||
# adds calibration at some cost. Larger cache helps speed kernel computations.
|
||||
classifier = SVC(
|
||||
kernel=params.get("kernel", "rbf"),
|
||||
C=params.get("C", 1.0),
|
||||
gamma=params.get("gamma", "scale"),
|
||||
probability=True, # Enable probability prediction
|
||||
class_weight=params.get("class_weight", None),
|
||||
cache_size=params.get("cache_size", 1000),
|
||||
random_state=self.config.random_seed,
|
||||
verbose=2,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user