Files
drc-ners-nlp/pipeline/gender/eval.py
T

116 lines
4.8 KiB
Python

import argparse
import os
import tensorflow as tf
from sklearn.metrics import (
accuracy_score, precision_recall_fscore_support, confusion_matrix
)
from tensorflow.keras.preprocessing.sequence import pad_sequences
from misc import GENDER_MODELS_DIR, load_csv_dataset, save_json_dataset, load_pickle, GENDER_RESULT_DIR
def evaluate_logreg(df, threshold):
"""
Evaluates a logistic regression model with the given DataFrame and threshold. The function loads
a pre-trained model and label encoder, transforms the input data into the required format, and
performs predictions. It returns the true labels, predicted labels, predicted probabilities, and
the encoder class labels.
"""
model = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_model.pkl"))
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_label_encoder.pkl"))
X = df["name"].tolist()
y_true = encoder.transform(df["sex"])
proba = model.predict_proba(X)
y_pred = (proba[:, 1] >= threshold).astype(int)
return y_true, y_pred, proba[:, 1], encoder.classes_
def evaluate_lstm(df, threshold, max_len=6):
"""
Evaluates the predictions of a pre-trained BiLSTM model on the given dataset and
returns the true labels, predicted labels, prediction probabilities, and class names.
"""
model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "lstm_model.keras"))
tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_tokenizer.pkl"))
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_label_encoder.pkl"))
sequences = tokenizer.texts_to_sequences(df["name"])
X = pad_sequences(sequences, maxlen=max_len, padding="post")
y_true = encoder.transform(df["sex"])
proba = model.predict(X)
y_pred = (proba[:, 1] >= threshold).astype(int)
return y_true, y_pred, proba[:, 1], encoder.classes_
def evaluate_transformer(df, threshold, max_len=6):
"""
Evaluates the transformer model for gender prediction. The function loads a pre-trained
transformer model, tokenizer, and label encoder. It processes the input dataframe by
tokenizing and padding the "name" column and encodes the "sex" column to numerical format.
The function then predicts the probabilities for the given names using the transformer model
and generates predictions based on the specified threshold.
"""
model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "transformer.keras"))
tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_tokenizer.pkl"))
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_label_encoder.pkl"))
sequences = tokenizer.texts_to_sequences(df["name"])
X = pad_sequences(sequences, maxlen=max_len, padding="post")
y_true = encoder.transform(df["sex"])
proba = model.predict(X)
y_pred = (proba[:, 1] >= threshold).astype(int)
return y_true, y_pred, proba[:, 1], encoder.classes_
def compute_metrics(y_true, y_pred, y_proba, class_names):
"""
Computes classification metrics for given true and predicted labels, along with
class probabilities and class names. The function calculates accuracy, precision,
recall, F1 score, and confusion matrix for evaluating model performance.
"""
acc = accuracy_score(y_true, y_pred)
pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
cm = confusion_matrix(y_true, y_pred).tolist()
return {
"accuracy": acc,
"precision": pr,
"recall": rc,
"f1": f1,
"confusion_matrix": {
"labels": class_names.tolist(),
"matrix": cm
}
}
def main():
parser = argparse.ArgumentParser(description="Evaluate gender prediction model")
parser.add_argument("--model", choices=["logreg", "lstm", "transformer"], required=True)
parser.add_argument("--dataset", default="names_evaluation.csv", help="Path to the dataset CSV file")
parser.add_argument("--size", type=int, help="Number of rows to load from the dataset")
parser.add_argument("--balanced", action="store_true", help="Load balanced dataset")
parser.add_argument("--threshold", type=float, default=0.5, help="Probability threshold for classification")
args = parser.parse_args()
df = load_csv_dataset(args.dataset, args.size, args.balanced)
model_funcs = {
"logreg": evaluate_logreg,
"lstm": evaluate_lstm,
"transformer": evaluate_transformer,
}
try:
y_true, y_pred, y_proba, classes = model_funcs[args.model](df, args.threshold)
except KeyError:
raise ValueError(f"Unknown model: {args.model}")
results = compute_metrics(y_true, y_pred, y_proba, classes)
save_json_dataset(results, os.path.join(GENDER_RESULT_DIR, f'{args.model}_eval'))
if __name__ == "__main__":
main()