Files
drc-ners-nlp/src/ners/cli.py
T
2025-10-05 18:14:15 +02:00

227 lines
6.8 KiB
Python

from __future__ import annotations
import os
import subprocess
import sys
from pathlib import Path
from typing import Optional
import typer
from ners.core.config import setup_config, PipelineConfig
app = typer.Typer(help="DRC NERS command-line interface", no_args_is_help=True)
# -------------------------
# Pipeline commands
# -------------------------
pipeline_app = typer.Typer(help="Data processing pipeline")
app.add_typer(pipeline_app, name="pipeline")
@pipeline_app.command("run")
def pipeline_run(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
"""Run the full processing pipeline."""
from ners.main import run_pipeline as _run_pipeline
cfg = setup_config(config_path=config, env=env)
code = _run_pipeline(cfg)
raise typer.Exit(code)
# -------------------------
# NER commands
# -------------------------
ner_app = typer.Typer(help="NER dataset and model")
app.add_typer(ner_app, name="ner")
def _load_config(config: Optional[Path], env: str) -> PipelineConfig:
return setup_config(config_path=config, env=env)
@ner_app.command("feature")
def ner_feature(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
from ners.ner import feature as _feature
cfg = _load_config(config, env)
_feature(cfg)
@ner_app.command("build")
def ner_build(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
from ners.ner import build as _build
cfg = _load_config(config, env)
_build(cfg)
@ner_app.command("train")
def ner_train(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
from ners.ner import train as _train
cfg = _load_config(config, env)
_train(cfg)
@ner_app.command("run")
def ner_run(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
reset: bool = typer.Option(
False, help="Reset intermediate outputs and rerun all steps"
),
) -> None:
from ners.ner import run_pipeline as _ner_pipeline
cfg = _load_config(config, env)
code = _ner_pipeline(cfg, reset)
raise typer.Exit(code)
# -------------------------
# Research commands
# -------------------------
research_app = typer.Typer(help="Research experiments and training")
app.add_typer(research_app, name="research")
@research_app.command("train")
def research_train(
name: str = typer.Option(..., "--name", help="Model name to train"),
type: str = typer.Option(..., "--type", help="Experiment type"),
templates: str = typer.Option(
"research_templates.yaml", help="Templates file path"
),
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
from ners.research.experiment.experiment_builder import ExperimentBuilder
from ners.research.model_trainer import ModelTrainer
cfg = _load_config(config, env)
exp_builder = ExperimentBuilder(cfg)
tmpl = exp_builder.load_templates(templates)
exp_cfg = exp_builder.find_template(tmpl, name, type)
trainer = ModelTrainer(cfg)
trainer.train_single_model(
model_name=exp_cfg.get("name"),
model_type=exp_cfg.get("model_type"),
features=exp_cfg.get("features"),
model_params=exp_cfg.get("model_params", {}),
tags=exp_cfg.get("tags", []),
)
# -------------------------
# Monitor commands
# -------------------------
monitor_app = typer.Typer(help="Monitor pipeline checkpoints")
app.add_typer(monitor_app, name="monitor")
@monitor_app.command("status")
def monitor_status(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
detailed: bool = typer.Option(
False, help="Show detailed status (failed batch IDs)"
),
) -> None:
_ = _load_config(config, env)
from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
PipelineMonitor().print_status(detailed=detailed)
@monitor_app.command("clean")
def monitor_clean(
step: Optional[str] = typer.Option(None, help="Step to clean; default all"),
keep_last: int = typer.Option(1, help="Number of latest checkpoint files to keep"),
force: bool = typer.Option(False, help="Do not ask for confirmation"),
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
_ = _load_config(config, env)
from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
mon = PipelineMonitor()
if not force:
typer.confirm("Clean checkpoints?", abort=True)
if step:
mon.clean_step_checkpoints(step, keep_last)
else:
for s in mon.steps:
mon.clean_step_checkpoints(s, keep_last)
@monitor_app.command("reset")
def monitor_reset(
step: Optional[str] = typer.Option(None, help="Step to reset; default all"),
force: bool = typer.Option(False, help="Do not ask for confirmation"),
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
_ = _load_config(config, env)
from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
mon = PipelineMonitor()
if not force:
msg = f"Reset {step or 'all steps'}? This deletes checkpoints."
typer.confirm(msg, abort=True)
if step:
mon.reset_step(step)
else:
for s in mon.steps:
mon.reset_step(s)
# -------------------------
# Web commands
# -------------------------
web_app = typer.Typer(help="Web UI wrapper")
app.add_typer(web_app, name="web")
@web_app.command("run")
def web_run(
config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
env: str = typer.Option("development", help="Environment name"),
) -> None:
"""Launch the Streamlit web app via subprocess."""
app_path = Path(__file__).parent / "web" / "app.py"
cmd = [
sys.executable,
"-m",
"streamlit",
"run",
str(app_path),
]
# Pass configuration via environment variables to avoid argparse in Streamlit
env_vars = os.environ.copy()
if config is not None:
env_vars["NERS_CONFIG"] = str(config)
env_vars["NERS_ENV"] = env
raise typer.Exit(subprocess.call(cmd, env=env_vars))
if __name__ == "__main__": # pragma: no cover
app()