#!.venv/bin/python3
import argparse
import sys

from core.config.config_manager import ConfigManager
from processing.monitoring.pipeline_monitor import PipelineMonitor
from processing.monitoring.data_analyzer import DatasetAnalyzer


def main():
    parser = argparse.ArgumentParser(
        description="Monitor and manage the DRC names processing pipeline"
    )
    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    # Status command
    status_parser = subparsers.add_parser("status", help="Show pipeline status")
    status_parser.add_argument(
        "--detailed",
        action="store_true",
        help="Show detailed information including failed batch IDs",
    )

    # Clean command
    clean_parser = subparsers.add_parser("clean", help="Clean checkpoint files")
    clean_parser.add_argument(
        "--step",
        type=str,
        choices=["data_cleaning", "feature_extraction", "llm_annotation", "data_splitting"],
        help="Clean specific step (default: all)",
    )
    clean_parser.add_argument(
        "--keep-last", type=int, default=1, help="Number of recent checkpoints to keep (default: 1)"
    )
    clean_parser.add_argument("--force", action="store_true", help="Clean without confirmation")

    # Reset command
    reset_parser = subparsers.add_parser("reset", help="Reset pipeline step")
    reset_parser.add_argument(
        "step",
        type=str,
        choices=["data_cleaning", "feature_extraction", "llm_annotation", "data_splitting"],
        help="Step to reset",
    )
    reset_parser.add_argument("--force", action="store_true", help="Reset without confirmation")

    # Analyze command
    analyze_parser = subparsers.add_parser("analyze", help="Analyze dataset")
    analyze_parser.add_argument(
        "--file",
        type=str,
        default="names_featured.csv",
        help="Dataset file to analyze (default: names_featured.csv)",
    )

    # Checkpoint info command
    info_parser = subparsers.add_parser("info", help="Show checkpoint information")

    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return 1

    monitor = PipelineMonitor()

    if args.command == "status":
        monitor.print_status(detailed=args.detailed)

    elif args.command == "clean":
        checkpoint_info = monitor.count_checkpoint_files()
        print(f"Current checkpoint storage: {checkpoint_info['total_size_mb']:.1f} MB")

        if not args.force:
            response = input("Are you sure you want to clean checkpoints? (y/N): ")
            if response.lower() != "y":
                print("Cancelled")
                return 0

        if args.step:
            monitor.clean_step_checkpoints(args.step, args.keep_last)
        else:
            for step in monitor.steps:
                monitor.clean_step_checkpoints(step, args.keep_last)

        print("Checkpoint cleaning completed")

    elif args.command == "reset":
        if not args.force:
            response = input(
                f"Are you sure you want to reset {args.step}? This will delete all checkpoints. (y/N): "
            )
            if response.lower() != "y":
                print("Cancelled")
                return 0

        monitor.reset_step(args.step)
        print(f"Reset completed for {args.step}")

    elif args.command == "analyze":
        # Use configured data directory instead of hardcoded DATA_DIR
        data_dir = ConfigManager().default_paths.data_dir
        filepath = data_dir / args.file

        if not filepath.exists():
            print(f"File not found: {filepath}")
            return 1

        analyzer = DatasetAnalyzer(str(filepath))

        if not analyzer.load_data():
            return 1

        completion_stats = analyzer.analyze_completion()
        quality_stats = analyzer.analyze_quality()

        print(f"\n=== Dataset Analysis: {args.file} ===")
        print(f"Total rows: {completion_stats['total_rows']:,}")
        print(
            f"Annotated: {completion_stats['annotated_rows']:,} ({completion_stats['annotation_percentage']:.1f}%)"
        )
        print(f"Unannotated: {completion_stats['unannotated_rows']:,}")
        print(
            f"Complete names: {completion_stats['complete_names']:,} ({completion_stats['completeness_percentage']:.1f}%)"
        )

        if "name_length" in quality_stats:
            length_stats = quality_stats["name_length"]
            print(f"\nName length statistics:")
            print(f"  Average: {length_stats['mean']:.1f} characters")
            print(f"  Range: {length_stats['min']}-{length_stats['max']} characters")

        if "word_distribution" in quality_stats:
            print(f"\nWord count distribution:")
            for words, count in quality_stats["word_distribution"].items():
                print(f"  {words} words: {count:,} names")

    elif args.command == "info":
        checkpoint_info = monitor.count_checkpoint_files()

        print(f"\n=== Checkpoint Information ===")
        print(f"Total storage: {checkpoint_info['total_size_mb']:.1f} MB")
        print()

        for step in monitor.steps:
            step_info = checkpoint_info[step]
            print(f"{step.replace('_', ' ').title()}:")
            print(f"  Files: {step_info['files']}")
            print(f"  Size: {step_info['size_mb']:.1f} MB")
            print()

    return 0


if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)