import argparse import json import os import pandas as pd import tensorflow as tf from sklearn.metrics import ( accuracy_score, precision_recall_fscore_support, confusion_matrix ) from tensorflow.keras.preprocessing.sequence import pad_sequences from misc import GENDER_MODELS_DIR, load_csv_dataset, save_json_dataset, load_pickle, GENDER_RESULT_DIR def load_dataset(path="names.csv", size=None): """ Loads a dataset from a CSV file, processes it to remove missing values and standardizes the case and formatting of specific columns. :param path: The path to the CSV file containing the dataset. Defaults to "names.csv". :type path: str :param size: The number of rows to load from the dataset. If None, the whole dataset is loaded. :type size: Optional[int] :return: A pandas DataFrame with the processed dataset where missing values in the 'name' and 'sex' columns are removed, and the text in these columns is converted to lowercase and stripped of leading/trailing whitespace. :rtype: pandas.DataFrame """ df = pd.DataFrame(load_csv_dataset(path, size)).dropna(subset=["name", "sex"]) df["name"] = df["name"].str.lower().str.strip() df["sex"] = df["sex"].str.lower().str.strip() return df def evaluate_logreg(df, threshold): """ Evaluates a logistic regression model with the given DataFrame and threshold. The function loads a pre-trained model and label encoder, transforms the input data into the required format, and performs predictions. It returns the true labels, predicted labels, predicted probabilities, and the encoder class labels. :param df: Input data containing a column "name" for names to evaluate and a column "sex" for true labels. Type: pandas.DataFrame :param threshold: Threshold value used for classifying the predictions. Probabilities greater than or equal to this value are classified into the positive class. Type: float :return: A tuple containing: - y_true: True labels after encoding. - y_pred: Predicted binary class labels based on the threshold. - proba[:, 1]: Probability values for the positive class. - encoder.classes_: Labels used by the label encoder. Type: tuple (numpy.ndarray, int, numpy.ndarray, numpy.ndarray) """ model = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_model.pkl")) encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_label_encoder.pkl")) X = df["name"].tolist() y_true = encoder.transform(df["sex"]) proba = model.predict_proba(X) y_pred = 1 if proba[:, 1] >= threshold else 0 return y_true, y_pred, proba[:, 1], encoder.classes_ def evaluate_lstm(df, threshold, max_len=6): """ Evaluates the predictions of a pre-trained BiLSTM model on the given dataset and returns the true labels, predicted labels, prediction probabilities, and class names. :param df: Input DataFrame containing the data for evaluation. The DataFrame must have two columns: "name" containing the input text data and "sex" containing the true labels. :type df: Pandas.DataFrame :param threshold: Decision threshold for determining binary classification outcome based on model's prediction probabilities. :type threshold: Float :param max_len: The maximum length of input sequences. Used to pad or truncate tokenized sequences. Default value is 6. :type max_len: Int :return: A tuple containing the following elements: - y_true: The true labels from the input DataFrame. - y_pred: The predicted binary labels according to the decision threshold. - proba: Prediction probabilities for the positive class, as output by the model. - encoder.classes_: An array of class names corresponding to the label encoding. :rtype: Tuple """ model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "lstm_model.keras")) tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_tokenizer.pkl")) encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_label_encoder.pkl")) sequences = tokenizer.texts_to_sequences(df["name"]) X = pad_sequences(sequences, maxlen=max_len, padding="post") y_true = encoder.transform(df["sex"]) proba = model.predict(X) y_pred = 1 if proba[:, 1] >= threshold else 0 return y_true, y_pred, proba[:, 1], encoder.classes_ def evaluate_transformer(df, threshold, max_len=6): """ Evaluates the transformer model for gender prediction. The function loads a pre-trained transformer model, tokenizer, and label encoder. It processes the input dataframe by tokenizing and padding the "name" column and encodes the "sex" column to numerical format. The function then predicts the probabilities for the given names using the transformer model and generates predictions based on the specified threshold. :param df: Pandas DataFrame containing a "name" column with strings to be evaluated and a "sex" column with corresponding target labels. :type df: Pd.DataFrame :param threshold: Threshold value used to determine binary classification labels from predicted probabilities. :type threshold: Float :param max_len: Maximum length for padded sequences, default is 6. :type max_len: Int, optional :return: A tuple containing the ground truth labels, predicted labels, predicted probabilities for the positive class, and a list of the label classes. :rtype: Tuple """ model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "transformer.keras")) tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_tokenizer.pkl")) encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_label_encoder.pkl")) sequences = tokenizer.texts_to_sequences(df["name"]) X = pad_sequences(sequences, maxlen=max_len, padding="post") y_true = encoder.transform(df["sex"]) proba = model.predict(X) y_pred = 1 if proba[:, 1] >= threshold else 0 return y_true, y_pred, proba[:, 1], encoder.classes_ def compute_metrics(y_true, y_pred, y_proba, class_names): """ Computes classification metrics for given true and predicted labels, along with class probabilities and class names. The function calculates accuracy, precision, recall, F1 score, and confusion matrix for evaluating model performance. :param y_true: Ground truth (correct) labels. :type y_true: list or numpy.ndarray :param y_pred: Predicted labels, as returned by a classifier. :type y_pred: list or numpy.ndarray :param y_proba: Predicted probabilities for positive class. :type y_proba: list or numpy.ndarray :param class_names: Names of the classes corresponding to labels in the confusion matrix. :type class_names: numpy.ndarray :return: A dictionary containing computed accuracy, precision, recall, F1 score, and confusion matrix with labels and matrix elements. :rtype: dict """ acc = accuracy_score(y_true, y_pred) pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary") cm = confusion_matrix(y_true, y_pred).tolist() return { "accuracy": acc, "precision": pr, "recall": rc, "f1": f1, "confusion_matrix": { "labels": class_names.tolist(), "matrix": cm } } def main(): parser = argparse.ArgumentParser(description="Evaluate gender prediction model") parser.add_argument("--model", choices=["logreg", "lstm", "transformer"], required=True) parser.add_argument("--dataset", default="names.csv") parser.add_argument("--size", type=int) parser.add_argument("--threshold", type=float, default=0.5) args = parser.parse_args() df = load_dataset(args.dataset, args.size) if args.model == "logreg": y_true, y_pred, y_proba, classes = evaluate_logreg(df, args.threshold) elif args.model == "lstm": y_true, y_pred, y_proba, classes = evaluate_lstm(df, args.threshold) elif args.model == "transformer": y_true, y_pred, y_proba, classes = evaluate_transformer(df, args.threshold) else: raise ValueError(f"Unknown model: {args.model}") results = compute_metrics(y_true, y_pred, y_proba, classes) save_json_dataset(results, os.path.join(GENDER_RESULT_DIR, f'{args.model}_eval')) if __name__ == "__main__": main()