116 lines
4.8 KiB
Python
116 lines
4.8 KiB
Python
import argparse
|
|
import os
|
|
|
|
import tensorflow as tf
|
|
from sklearn.metrics import (
|
|
accuracy_score, precision_recall_fscore_support, confusion_matrix
|
|
)
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
|
from misc import GENDER_MODELS_DIR, load_csv_dataset, save_json_dataset, load_pickle, GENDER_RESULT_DIR
|
|
|
|
|
|
def evaluate_logreg(df, threshold):
|
|
"""
|
|
Evaluates a logistic regression model with the given DataFrame and threshold. The function loads
|
|
a pre-trained model and label encoder, transforms the input data into the required format, and
|
|
performs predictions. It returns the true labels, predicted labels, predicted probabilities, and
|
|
the encoder class labels.
|
|
"""
|
|
model = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_model.pkl"))
|
|
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "regression_label_encoder.pkl"))
|
|
|
|
X = df["name"].tolist()
|
|
y_true = encoder.transform(df["sex"])
|
|
proba = model.predict_proba(X)
|
|
y_pred = (proba[:, 1] >= threshold).astype(int)
|
|
return y_true, y_pred, proba[:, 1], encoder.classes_
|
|
|
|
|
|
def evaluate_lstm(df, threshold, max_len=6):
|
|
"""
|
|
Evaluates the predictions of a pre-trained BiLSTM model on the given dataset and
|
|
returns the true labels, predicted labels, prediction probabilities, and class names.
|
|
"""
|
|
model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "lstm_model.keras"))
|
|
tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_tokenizer.pkl"))
|
|
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "lstm_label_encoder.pkl"))
|
|
|
|
sequences = tokenizer.texts_to_sequences(df["name"])
|
|
X = pad_sequences(sequences, maxlen=max_len, padding="post")
|
|
y_true = encoder.transform(df["sex"])
|
|
proba = model.predict(X)
|
|
y_pred = (proba[:, 1] >= threshold).astype(int)
|
|
return y_true, y_pred, proba[:, 1], encoder.classes_
|
|
|
|
|
|
def evaluate_transformer(df, threshold, max_len=6):
|
|
"""
|
|
Evaluates the transformer model for gender prediction. The function loads a pre-trained
|
|
transformer model, tokenizer, and label encoder. It processes the input dataframe by
|
|
tokenizing and padding the "name" column and encodes the "sex" column to numerical format.
|
|
The function then predicts the probabilities for the given names using the transformer model
|
|
and generates predictions based on the specified threshold.
|
|
"""
|
|
model = tf.keras.models.load_model(os.path.join(GENDER_MODELS_DIR, "transformer.keras"))
|
|
tokenizer = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_tokenizer.pkl"))
|
|
encoder = load_pickle(os.path.join(GENDER_MODELS_DIR, "transformer_label_encoder.pkl"))
|
|
|
|
sequences = tokenizer.texts_to_sequences(df["name"])
|
|
X = pad_sequences(sequences, maxlen=max_len, padding="post")
|
|
y_true = encoder.transform(df["sex"])
|
|
proba = model.predict(X)
|
|
y_pred = (proba[:, 1] >= threshold).astype(int)
|
|
return y_true, y_pred, proba[:, 1], encoder.classes_
|
|
|
|
|
|
def compute_metrics(y_true, y_pred, y_proba, class_names):
|
|
"""
|
|
Computes classification metrics for given true and predicted labels, along with
|
|
class probabilities and class names. The function calculates accuracy, precision,
|
|
recall, F1 score, and confusion matrix for evaluating model performance.
|
|
"""
|
|
acc = accuracy_score(y_true, y_pred)
|
|
pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
|
|
cm = confusion_matrix(y_true, y_pred).tolist()
|
|
|
|
return {
|
|
"accuracy": acc,
|
|
"precision": pr,
|
|
"recall": rc,
|
|
"f1": f1,
|
|
"confusion_matrix": {
|
|
"labels": class_names.tolist(),
|
|
"matrix": cm
|
|
}
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Evaluate gender prediction model")
|
|
parser.add_argument("--model", choices=["logreg", "lstm", "transformer"], required=True)
|
|
parser.add_argument("--dataset", default="names_evaluation.csv", help="Path to the dataset CSV file")
|
|
parser.add_argument("--size", type=int, help="Number of rows to load from the dataset")
|
|
parser.add_argument("--balanced", action="store_true", help="Load balanced dataset")
|
|
parser.add_argument("--threshold", type=float, default=0.5, help="Probability threshold for classification")
|
|
args = parser.parse_args()
|
|
|
|
df = load_csv_dataset(args.dataset, args.size, args.balanced)
|
|
|
|
model_funcs = {
|
|
"logreg": evaluate_logreg,
|
|
"lstm": evaluate_lstm,
|
|
"transformer": evaluate_transformer,
|
|
}
|
|
try:
|
|
y_true, y_pred, y_proba, classes = model_funcs[args.model](df, args.threshold)
|
|
except KeyError:
|
|
raise ValueError(f"Unknown model: {args.model}")
|
|
|
|
results = compute_metrics(y_true, y_pred, y_proba, classes)
|
|
save_json_dataset(results, os.path.join(GENDER_RESULT_DIR, f'{args.model}_eval'))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|