refactor: reorganize project structure and enhance model verbosity

This commit is contained in:
2025-08-06 21:57:10 +02:00
parent ad8db43748
commit d7aa24a935
23 changed files with 1209 additions and 1416 deletions
-28
View File
@@ -50,31 +50,3 @@ class DatasetAnalyzer:
"complete_names": complete_names,
"completeness_percentage": (complete_names / total_rows * 100) if total_rows > 0 else 0,
}
def analyze_quality(self) -> Dict:
"""Analyze data quality metrics"""
if self.df is None:
return {}
quality_metrics = {}
# Missing values
missing_data = self.df.isnull().sum()
quality_metrics["missing_values"] = missing_data.to_dict()
# Name length distribution
if "name" in self.df.columns:
name_lengths = self.df["name"].str.len()
quality_metrics["name_length"] = {
"mean": name_lengths.mean(),
"median": name_lengths.median(),
"min": name_lengths.min(),
"max": name_lengths.max(),
}
# Word count distribution
if "words" in self.df.columns:
word_counts = self.df["words"].value_counts().sort_index()
quality_metrics["word_distribution"] = word_counts.to_dict()
return quality_metrics
+1 -1
View File
@@ -39,7 +39,7 @@ class FeatureExtractionStep(PipelineStep):
@classmethod
def get_name_category(cls, word_count: int) -> NameCategory:
"""Determine name category based on word count"""
if word_count <= 3:
if word_count == 3:
return NameCategory.SIMPLE
else:
return NameCategory.COMPOSE