refactor: rename setup_config_and_logging to setup_config and update references
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
#!.venv/bin/python3
|
#!.venv/bin/python3
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
from core.config import setup_config_and_logging
|
from core.config import setup_config
|
||||||
from core.utils.data_loader import DataLoader
|
from core.utils.data_loader import DataLoader
|
||||||
from interface.configuration import Configuration
|
from interface.configuration import Configuration
|
||||||
from interface.dashboard import Dashboard
|
from interface.dashboard import Dashboard
|
||||||
@@ -26,7 +26,7 @@ st.set_page_config(
|
|||||||
@st.cache_data
|
@st.cache_data
|
||||||
def load_config():
|
def load_config():
|
||||||
"""Load application configuration with unified setup"""
|
"""Load application configuration with unified setup"""
|
||||||
return setup_config_and_logging(env="development")
|
return setup_config(env="development")
|
||||||
|
|
||||||
|
|
||||||
class StreamlitApp:
|
class StreamlitApp:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from core.config import setup_config_and_logging
|
from core.config import setup_config
|
||||||
from research.experiment.experiment_runner import ExperimentRunner
|
from research.experiment.experiment_runner import ExperimentRunner
|
||||||
from research.experiment.experiment_tracker import ExperimentTracker
|
from research.experiment.experiment_tracker import ExperimentTracker
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ def show_experiment_details(args):
|
|||||||
def compare_experiments_cmd(args):
|
def compare_experiments_cmd(args):
|
||||||
"""Compare multiple experiments"""
|
"""Compare multiple experiments"""
|
||||||
|
|
||||||
config = setup_config_and_logging(env="development")
|
config = setup_config(env="development")
|
||||||
runner = ExperimentRunner(config)
|
runner = ExperimentRunner(config)
|
||||||
comparison = runner.compare_experiments(args.experiment_ids)
|
comparison = runner.compare_experiments(args.experiment_ids)
|
||||||
|
|
||||||
@@ -172,7 +172,7 @@ def main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Load configuration and setup logging
|
# Load configuration and setup logging
|
||||||
config = setup_config_and_logging(config_path=args.config, env=args.env)
|
config = setup_config(config_path=args.config, env=args.env)
|
||||||
|
|
||||||
# Override log level if verbose requested
|
# Override log level if verbose requested
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
|
|||||||
@@ -21,10 +21,7 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> PipelineConfi
|
|||||||
return config_manager.get_config()
|
return config_manager.get_config()
|
||||||
|
|
||||||
|
|
||||||
def setup_config_and_logging(
|
def setup_config(config_path: Optional[Path] = None, env: str = "development") -> PipelineConfig:
|
||||||
config_path: Optional[Path] = None,
|
|
||||||
env: str = "development"
|
|
||||||
) -> PipelineConfig:
|
|
||||||
"""
|
"""
|
||||||
Unified configuration loading and logging setup for all entrypoint scripts.
|
Unified configuration loading and logging setup for all entrypoint scripts.
|
||||||
|
|
||||||
|
|||||||
@@ -1,22 +1,21 @@
|
|||||||
#!.venv/bin/python3
|
#!.venv/bin/python3
|
||||||
import sys
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
from core.utils.data_loader import DataLoader
|
from core.config import setup_config
|
||||||
from core.config import setup_config_and_logging
|
|
||||||
from core.utils import get_data_file_path
|
from core.utils import get_data_file_path
|
||||||
|
from core.utils.data_loader import DataLoader
|
||||||
from processing.pipeline import Pipeline
|
|
||||||
from processing.batch.batch_config import BatchConfig
|
from processing.batch.batch_config import BatchConfig
|
||||||
from processing.steps.data_splitting_step import DataSplittingStep
|
from processing.pipeline import Pipeline
|
||||||
from processing.steps.llm_annotation_step import LLMAnnotationStep
|
|
||||||
from processing.steps.feature_extraction_step import FeatureExtractionStep
|
|
||||||
from processing.steps.data_cleaning_step import DataCleaningStep
|
from processing.steps.data_cleaning_step import DataCleaningStep
|
||||||
|
from processing.steps.data_splitting_step import DataSplittingStep
|
||||||
|
from processing.steps.feature_extraction_step import FeatureExtractionStep
|
||||||
|
from processing.steps.llm_annotation_step import LLMAnnotationStep
|
||||||
|
|
||||||
|
|
||||||
def create_pipeline_from_config(config) -> Pipeline:
|
def create_pipeline(config) -> Pipeline:
|
||||||
"""Create pipeline from configuration"""
|
"""Create pipeline from configuration"""
|
||||||
batch_config = BatchConfig(
|
batch_config = BatchConfig(
|
||||||
batch_size=config.processing.batch_size,
|
batch_size=config.processing.batch_size,
|
||||||
@@ -42,14 +41,13 @@ def create_pipeline_from_config(config) -> Pipeline:
|
|||||||
return pipeline
|
return pipeline
|
||||||
|
|
||||||
|
|
||||||
def run_pipeline(config, resume: bool = False) -> int:
|
def run_pipeline(config) -> int:
|
||||||
"""Run the complete pipeline"""
|
"""Run the complete pipeline"""
|
||||||
try:
|
try:
|
||||||
logging.info(f"Starting pipeline: {config.name} v{config.version}")
|
logging.info(f"Starting pipeline: {config.name} v{config.version}")
|
||||||
|
|
||||||
# Load input data
|
# Load input data
|
||||||
input_file_path = get_data_file_path(config.data.input_file, config)
|
input_file_path = get_data_file_path(config.data.input_file, config)
|
||||||
|
|
||||||
if not input_file_path.exists():
|
if not input_file_path.exists():
|
||||||
logging.error(f"Input file not found: {input_file_path}")
|
logging.error(f"Input file not found: {input_file_path}")
|
||||||
return 1
|
return 1
|
||||||
@@ -60,7 +58,7 @@ def run_pipeline(config, resume: bool = False) -> int:
|
|||||||
logging.info(f"Loaded {len(df)} rows, {len(df.columns)} columns")
|
logging.info(f"Loaded {len(df)} rows, {len(df.columns)} columns")
|
||||||
|
|
||||||
# Create and run pipeline
|
# Create and run pipeline
|
||||||
pipeline = create_pipeline_from_config(config)
|
pipeline = create_pipeline(config)
|
||||||
|
|
||||||
logging.info("Starting pipeline execution")
|
logging.info("Starting pipeline execution")
|
||||||
result_df = pipeline.run(df)
|
result_df = pipeline.run(df)
|
||||||
@@ -94,46 +92,18 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="DRC Names Processing Pipeline",
|
description="DRC Names Processing Pipeline",
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
epilog="""
|
|
||||||
Configuration File Examples:
|
|
||||||
config/pipeline.yaml - Main configuration
|
|
||||||
config/pipeline.development.yaml - Development environment (default)
|
|
||||||
config/pipeline.production.yaml - Production environment
|
|
||||||
|
|
||||||
Usage Examples:
|
|
||||||
python main.py # Use development config (default)
|
|
||||||
python main.py --config config/pipeline.yaml # Use specific config
|
|
||||||
python main.py --env production # Use production environment
|
|
||||||
python main.py --resume # Resume from checkpoints
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("--config", type=Path, help="Path to configuration file")
|
|
||||||
parser.add_argument(
|
|
||||||
"--env", type=str, default="development",
|
|
||||||
help="Environment name (default: development)"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--resume", action="store_true", help="Resume pipeline from existing checkpoints"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--validate-config", action="store_true", help="Validate configuration file and exit"
|
|
||||||
)
|
)
|
||||||
|
parser.add_argument("--config", type=str, help="Path to configuration file")
|
||||||
|
parser.add_argument("--env", type=str, default="development", help="Environment name")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Load configuration and setup logging
|
config = setup_config(config_path=args.config, env=args.env)
|
||||||
config = setup_config_and_logging(config_path=args.config, env=args.env)
|
return run_pipeline(config)
|
||||||
|
|
||||||
if args.validate_config:
|
|
||||||
print(f"Configuration is valid: {config.name} v{config.version}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Run pipeline
|
|
||||||
return run_pipeline(config, args.resume)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Configuration or pipeline failed: {e}")
|
print(f"Pipeline failed: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+7
-4
@@ -1,9 +1,10 @@
|
|||||||
#!.venv/bin/python3
|
#!.venv/bin/python3
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from core.config import setup_config_and_logging
|
from core.config import setup_config
|
||||||
from processing.monitoring.data_analyzer import DatasetAnalyzer
|
from processing.monitoring.data_analyzer import DatasetAnalyzer
|
||||||
from processing.monitoring.pipeline_monitor import PipelineMonitor
|
from processing.monitoring.pipeline_monitor import PipelineMonitor
|
||||||
|
|
||||||
@@ -71,7 +72,7 @@ def main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Load configuration and setup logging
|
# Load configuration and setup logging
|
||||||
config = setup_config_and_logging(config_path=args.config, env=args.env)
|
config = setup_config(config_path=args.config, env=args.env)
|
||||||
|
|
||||||
monitor = PipelineMonitor()
|
monitor = PipelineMonitor()
|
||||||
|
|
||||||
@@ -126,7 +127,8 @@ def main():
|
|||||||
|
|
||||||
print(f"\n=== Dataset Analysis: {args.file} ===")
|
print(f"\n=== Dataset Analysis: {args.file} ===")
|
||||||
print(f"Total rows: {completion_stats['total_rows']:,}")
|
print(f"Total rows: {completion_stats['total_rows']:,}")
|
||||||
print(f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)")
|
print(
|
||||||
|
f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)")
|
||||||
print(f"Unannotated: {completion_stats['unannotated_rows']:,}")
|
print(f"Unannotated: {completion_stats['unannotated_rows']:,}")
|
||||||
print(
|
print(
|
||||||
f"Complete names: {completion_stats['complete_names']:,} ({completion_stats['completeness_percentage']:.1f}%)"
|
f"Complete names: {completion_stats['complete_names']:,} ({completion_stats['completeness_percentage']:.1f}%)"
|
||||||
@@ -149,7 +151,8 @@ def main():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Monitor command failed: {e}")
|
print(f"Monitoring failed: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,28 +1,24 @@
|
|||||||
#!.venv/bin/python3
|
#!.venv/bin/python3
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
from core.config import setup_config_and_logging
|
from core.config import setup_config
|
||||||
from research.model_trainer import ModelTrainer
|
from research.model_trainer import ModelTrainer
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Train DRC Names Models")
|
parser = argparse.ArgumentParser(description="Train DRC Names Models")
|
||||||
parser.add_argument("--config", type=str, help="Path to configuration file")
|
|
||||||
parser.add_argument(
|
|
||||||
"--env", type=str, default="development",
|
|
||||||
help="Environment name (default: development)"
|
|
||||||
)
|
|
||||||
parser.add_argument("--type", type=str, help="Specific model type to train")
|
parser.add_argument("--type", type=str, help="Specific model type to train")
|
||||||
parser.add_argument("--name", type=str, help="Model name")
|
parser.add_argument("--name", type=str, help="Model name")
|
||||||
|
parser.add_argument("--config", type=str, help="Path to configuration file")
|
||||||
|
parser.add_argument("--env", type=str, default="development", help="Environment name")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Load configuration and setup logging
|
config = setup_config(config_path=args.config, env=args.env)
|
||||||
config = setup_config_and_logging(config_path=args.config, env=args.env)
|
trainer = ModelTrainer(config)
|
||||||
|
|
||||||
trainer = ModelTrainer()
|
|
||||||
|
|
||||||
# Train specific model
|
# Train specific model
|
||||||
trainer.train_single_model(
|
trainer.train_single_model(
|
||||||
@@ -30,11 +26,11 @@ def main():
|
|||||||
model_type=args.type,
|
model_type=args.type,
|
||||||
features=["full_name"]
|
features=["full_name"]
|
||||||
)
|
)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Training failed: {e}")
|
logging.error(f"Training failed: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user