refactor: reorganize project structure and enhance model verbosity

This commit is contained in:
2025-08-06 21:57:10 +02:00
parent ad8db43748
commit d7aa24a935
23 changed files with 1209 additions and 1416 deletions
+2 -16
View File
@@ -3,8 +3,8 @@ import argparse
import sys
from core.config.config_manager import ConfigManager
from processing.monitoring.pipeline_monitor import PipelineMonitor
from processing.monitoring.data_analyzer import DatasetAnalyzer
from processing.monitoring.pipeline_monitor import PipelineMonitor
def main():
@@ -112,29 +112,15 @@ def main():
return 1
completion_stats = analyzer.analyze_completion()
quality_stats = analyzer.analyze_quality()
print(f"\n=== Dataset Analysis: {args.file} ===")
print(f"Total rows: {completion_stats['total_rows']:,}")
print(
f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)"
)
print(f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)")
print(f"Unannotated: {completion_stats['unannotated_rows']:,}")
print(
f"Complete names: {completion_stats['complete_names']:,} ({completion_stats['completeness_percentage']:.1f}%)"
)
if "name_length" in quality_stats:
length_stats = quality_stats["name_length"]
print(f"\nName length statistics:")
print(f" Average: {length_stats['mean']:.1f} characters")
print(f" Range: {length_stats['min']}-{length_stats['max']} characters")
if "word_distribution" in quality_stats:
print(f"\nWord count distribution:")
for words, count in quality_stats["word_distribution"].items():
print(f" {words} words: {count:,} names")
elif args.command == "info":
checkpoint_info = monitor.count_checkpoint_files()