diff --git a/app.py b/app.py index 5535e8a..50f27a4 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,7 @@ #!.venv/bin/python3 import streamlit as st -from core.config import setup_config_and_logging +from core.config import setup_config from core.utils.data_loader import DataLoader from interface.configuration import Configuration from interface.dashboard import Dashboard @@ -26,7 +26,7 @@ st.set_page_config( @st.cache_data def load_config(): """Load application configuration with unified setup""" - return setup_config_and_logging(env="development") + return setup_config(env="development") class StreamlitApp: diff --git a/cli.py b/cli.py index a01c0c7..8bb30c4 100755 --- a/cli.py +++ b/cli.py @@ -6,7 +6,7 @@ from pathlib import Path import pandas as pd -from core.config import setup_config_and_logging +from core.config import setup_config from research.experiment.experiment_runner import ExperimentRunner from research.experiment.experiment_tracker import ExperimentTracker @@ -104,7 +104,7 @@ def show_experiment_details(args): def compare_experiments_cmd(args): """Compare multiple experiments""" - config = setup_config_and_logging(env="development") + config = setup_config(env="development") runner = ExperimentRunner(config) comparison = runner.compare_experiments(args.experiment_ids) @@ -172,7 +172,7 @@ def main(): try: # Load configuration and setup logging - config = setup_config_and_logging(config_path=args.config, env=args.env) + config = setup_config(config_path=args.config, env=args.env) # Override log level if verbose requested if args.verbose: diff --git a/core/config/__init__.py b/core/config/__init__.py index 9c26acb..12cbab4 100644 --- a/core/config/__init__.py +++ b/core/config/__init__.py @@ -21,10 +21,7 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> PipelineConfi return config_manager.get_config() -def setup_config_and_logging( - config_path: Optional[Path] = None, - env: str = "development" -) -> PipelineConfig: +def setup_config(config_path: Optional[Path] = None, env: str = "development") -> PipelineConfig: """ Unified configuration loading and logging setup for all entrypoint scripts. diff --git a/main.py b/main.py index e9c58c6..1d2c7e4 100755 --- a/main.py +++ b/main.py @@ -1,22 +1,21 @@ #!.venv/bin/python3 -import sys import argparse import logging -from pathlib import Path +import sys +import traceback -from core.utils.data_loader import DataLoader -from core.config import setup_config_and_logging +from core.config import setup_config from core.utils import get_data_file_path - -from processing.pipeline import Pipeline +from core.utils.data_loader import DataLoader from processing.batch.batch_config import BatchConfig -from processing.steps.data_splitting_step import DataSplittingStep -from processing.steps.llm_annotation_step import LLMAnnotationStep -from processing.steps.feature_extraction_step import FeatureExtractionStep +from processing.pipeline import Pipeline from processing.steps.data_cleaning_step import DataCleaningStep +from processing.steps.data_splitting_step import DataSplittingStep +from processing.steps.feature_extraction_step import FeatureExtractionStep +from processing.steps.llm_annotation_step import LLMAnnotationStep -def create_pipeline_from_config(config) -> Pipeline: +def create_pipeline(config) -> Pipeline: """Create pipeline from configuration""" batch_config = BatchConfig( batch_size=config.processing.batch_size, @@ -42,14 +41,13 @@ def create_pipeline_from_config(config) -> Pipeline: return pipeline -def run_pipeline(config, resume: bool = False) -> int: +def run_pipeline(config) -> int: """Run the complete pipeline""" try: logging.info(f"Starting pipeline: {config.name} v{config.version}") # Load input data input_file_path = get_data_file_path(config.data.input_file, config) - if not input_file_path.exists(): logging.error(f"Input file not found: {input_file_path}") return 1 @@ -60,7 +58,7 @@ def run_pipeline(config, resume: bool = False) -> int: logging.info(f"Loaded {len(df)} rows, {len(df.columns)} columns") # Create and run pipeline - pipeline = create_pipeline_from_config(config) + pipeline = create_pipeline(config) logging.info("Starting pipeline execution") result_df = pipeline.run(df) @@ -94,46 +92,18 @@ def main(): parser = argparse.ArgumentParser( description="DRC Names Processing Pipeline", formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Configuration File Examples: - config/pipeline.yaml - Main configuration - config/pipeline.development.yaml - Development environment (default) - config/pipeline.production.yaml - Production environment - -Usage Examples: - python main.py # Use development config (default) - python main.py --config config/pipeline.yaml # Use specific config - python main.py --env production # Use production environment - python main.py --resume # Resume from checkpoints - """, - ) - - parser.add_argument("--config", type=Path, help="Path to configuration file") - parser.add_argument( - "--env", type=str, default="development", - help="Environment name (default: development)" - ) - parser.add_argument( - "--resume", action="store_true", help="Resume pipeline from existing checkpoints" - ) - parser.add_argument( - "--validate-config", action="store_true", help="Validate configuration file and exit" ) + parser.add_argument("--config", type=str, help="Path to configuration file") + parser.add_argument("--env", type=str, default="development", help="Environment name") args = parser.parse_args() try: - # Load configuration and setup logging - config = setup_config_and_logging(config_path=args.config, env=args.env) - - if args.validate_config: - print(f"Configuration is valid: {config.name} v{config.version}") - return 0 - - # Run pipeline - return run_pipeline(config, args.resume) + config = setup_config(config_path=args.config, env=args.env) + return run_pipeline(config) except Exception as e: - print(f"Configuration or pipeline failed: {e}") + print(f"Pipeline failed: {e}") + traceback.print_exc() return 1 diff --git a/monitor.py b/monitor.py index 6c53d43..4e37049 100755 --- a/monitor.py +++ b/monitor.py @@ -1,9 +1,10 @@ #!.venv/bin/python3 import argparse import sys +import traceback from pathlib import Path -from core.config import setup_config_and_logging +from core.config import setup_config from processing.monitoring.data_analyzer import DatasetAnalyzer from processing.monitoring.pipeline_monitor import PipelineMonitor @@ -71,7 +72,7 @@ def main(): try: # Load configuration and setup logging - config = setup_config_and_logging(config_path=args.config, env=args.env) + config = setup_config(config_path=args.config, env=args.env) monitor = PipelineMonitor() @@ -126,7 +127,8 @@ def main(): print(f"\n=== Dataset Analysis: {args.file} ===") print(f"Total rows: {completion_stats['total_rows']:,}") - print(f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)") + print( + f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)") print(f"Unannotated: {completion_stats['unannotated_rows']:,}") print( f"Complete names: {completion_stats['complete_names']:,} ({completion_stats['completeness_percentage']:.1f}%)" @@ -149,7 +151,8 @@ def main(): return 0 except Exception as e: - print(f"Monitor command failed: {e}") + print(f"Monitoring failed: {e}") + traceback.print_exc() return 1 diff --git a/train.py b/train.py index 7a451db..849b8b1 100755 --- a/train.py +++ b/train.py @@ -1,28 +1,24 @@ #!.venv/bin/python3 import argparse +import logging import sys +import traceback -from core.config import setup_config_and_logging +from core.config import setup_config from research.model_trainer import ModelTrainer def main(): parser = argparse.ArgumentParser(description="Train DRC Names Models") - parser.add_argument("--config", type=str, help="Path to configuration file") - parser.add_argument( - "--env", type=str, default="development", - help="Environment name (default: development)" - ) parser.add_argument("--type", type=str, help="Specific model type to train") parser.add_argument("--name", type=str, help="Model name") - + parser.add_argument("--config", type=str, help="Path to configuration file") + parser.add_argument("--env", type=str, default="development", help="Environment name") args = parser.parse_args() try: - # Load configuration and setup logging - config = setup_config_and_logging(config_path=args.config, env=args.env) - - trainer = ModelTrainer() + config = setup_config(config_path=args.config, env=args.env) + trainer = ModelTrainer(config) # Train specific model trainer.train_single_model( @@ -30,11 +26,11 @@ def main(): model_type=args.type, features=["full_name"] ) - return 0 except Exception as e: - print(f"Training failed: {e}") + logging.error(f"Training failed: {e}") + traceback.print_exc() return 1