refactoring: uv
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
from dataclasses import field
|
||||
from typing import Dict, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class DataConfig(BaseModel):
|
||||
"""Data handling configuration"""
|
||||
|
||||
input_file: str = "names.csv"
|
||||
output_files: Dict[str, str] = field(
|
||||
default_factory=lambda: {
|
||||
"featured": "names_featured.csv",
|
||||
"evaluation": "names_evaluation.csv",
|
||||
"engineered": "names_engineered.csv",
|
||||
"males": "names_males.csv",
|
||||
"females": "names_females.csv",
|
||||
"ner_data": "names_ner.json",
|
||||
"ner_spacy": "names_ner.spacy",
|
||||
}
|
||||
)
|
||||
selected_columns: list[str] = field(default=["name", "sex", "region"])
|
||||
split_evaluation: bool = False
|
||||
split_by_province: bool = True
|
||||
split_by_gender: bool = True
|
||||
split_ner_data: bool = True
|
||||
evaluation_fraction: float = 0.2
|
||||
random_seed: int = 42
|
||||
|
||||
# Dataset size limiting options
|
||||
max_dataset_size: Optional[int] = None
|
||||
balance_by_sex: bool = False
|
||||
Reference in New Issue
Block a user