def test_roc_auc(caplog): """Unit test of roc_auc_scorer""" caplog.set_level(logging.INFO) golds = np.array([[1], [0], [1], [0], [1], [0]]) gold_probs = np.array([[0.4, 0.6], [0.9, 0.1], [0.3, 0.7], [0.8, 0.2], [0.1, 0.9], [0.6, 0.4]]) probs = np.array([[0.2, 0.8], [0.4, 0.6], [0.1, 0.9], [0.3, 0.7], [0.3, 0.7], [0.8, 0.2]]) preds = np.array([[0.8], [0.6], [0.9], [0.7], [0.7], [0.2]]) metric_dict = roc_auc_scorer(golds, probs, None) assert isequal(metric_dict, {"roc_auc": 0.9444444444444444}) metric_dict = roc_auc_scorer(gold_probs, probs, None) assert isequal(metric_dict, {"roc_auc": 0.9444444444444444}) metric_dict = roc_auc_scorer(golds, preds, None) assert isequal(metric_dict, {"roc_auc": 0.9444444444444444}) metric_dict = roc_auc_scorer(gold_probs, preds, None) assert isequal(metric_dict, {"roc_auc": 0.9444444444444444}) golds = np.array([1, 1, 1, 1, 1, 1]) metric_dict = roc_auc_scorer(golds, probs, None) assert isequal(metric_dict, {"roc_auc": float("nan")})
def test_accuracy(caplog): """Unit test of accuracy_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) gold_probs = np.array([[0.6, 0.4], [0.1, 0.9], [0.7, 0.3], [0.2, 0.8], [0.9, 0.1], [0.4, 0.6]]) probs = np.array([[0.9, 0.1], [0.6, 0.4], [1.0, 0.0], [0.8, 0.2], [0.6, 0.4], [0.05, 0.95]]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = accuracy_scorer(golds, None, preds) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(golds, probs, None) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(golds, probs, preds, topk=2) assert isequal(metric_dict, {"accuracy@2": 1.0}) metric_dict = accuracy_scorer(gold_probs, None, preds) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(gold_probs, probs, preds, topk=2) assert isequal(metric_dict, {"accuracy@2": 1.0})
def test_accuracy_f1(caplog): """Unit test of accuracy_f1_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) gold_probs = np.array([[0.6, 0.4], [0.1, 0.9], [0.7, 0.3], [0.2, 0.8], [0.9, 0.1], [0.4, 0.6]]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = accuracy_f1_scorer(golds, None, preds) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"accuracy_f1": 0.7083333333333333}) metric_dict = accuracy_f1_scorer(gold_probs, None, preds) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333})
def test_adagrad_optimizer(caplog): """Unit test of Adagrad optimizer""" caplog.set_level(logging.INFO) optimizer = "adagrad" dirpath = "temp_test_optimizer" model = nn.Linear(1, 1) emmental_learner = EmmentalLearner() Meta.reset() emmental.init(dirpath) # Test default Adagrad setting config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}} emmental.Meta.update_config(config) emmental_learner._set_optimizer(model) assert isequal( emmental_learner.optimizer.defaults, { "lr": 0.001, "lr_decay": 0, "initial_accumulator_value": 0, "eps": 1e-10, "weight_decay": 0, }, ) # Test new Adagrad setting config = { "learner_config": { "optimizer_config": { "optimizer": optimizer, "lr": 0.02, "l2": 0.05, f"{optimizer}_config": { "lr_decay": 0.1, "initial_accumulator_value": 0.2, "eps": 1e-5, }, } } } emmental.Meta.update_config(config) emmental_learner._set_optimizer(model) assert isequal( emmental_learner.optimizer.defaults, { "lr": 0.02, "lr_decay": 0.1, "initial_accumulator_value": 0.2, "eps": 1e-5, "weight_decay": 0.05, }, ) shutil.rmtree(dirpath)
def test_mean_squared_error(caplog): """Unit test of mean_squared_error_scorer.""" caplog.set_level(logging.INFO) metric_dict = mean_squared_error_scorer(GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"mean_squared_error": 0.1954166666666667}) metric_dict = mean_squared_error_scorer(PROB_GOLDS, PROBS, None) assert isequal(metric_dict, {"mean_squared_error": 0.16708333333333336})
def test_adadelta_optimizer(caplog): """Unit test of Adadelta optimizer""" caplog.set_level(logging.INFO) optimizer = "adadelta" dirpath = "temp_test_optimizer" model = nn.Linear(1, 1) emmental_learner = EmmentalLearner() Meta.reset() emmental.init(dirpath) # Test default Adadelta setting config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}} emmental.Meta.update_config(config) emmental_learner._set_optimizer(model) assert isequal( emmental_learner.optimizer.defaults, { "lr": 0.001, "rho": 0.9, "eps": 1e-06, "weight_decay": 0 }, ) # Test new Adadelta setting config = { "learner_config": { "optimizer_config": { "optimizer": optimizer, "lr": 0.02, "l2": 0.05, f"{optimizer}_config": { "rho": 0.6, "eps": 1e-05 }, } } } emmental.Meta.update_config(config) emmental_learner._set_optimizer(model) assert isequal( emmental_learner.optimizer.defaults, { "lr": 0.02, "rho": 0.6, "eps": 1e-05, "weight_decay": 0.05 }, ) shutil.rmtree(dirpath)
def test_recall(caplog): """Unit test of recall_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = recall_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"recall": 1})
def test_fbeta(caplog): """Unit test of fbeta_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = fbeta_scorer(golds, None, preds, pos_label=1, beta=2) assert isequal(metric_dict, {"f2": 0.3846153846153846}) metric_dict = fbeta_scorer(golds, None, preds, pos_label=0, beta=2) assert isequal(metric_dict, {"f2": 0.8823529411764706})
def test_precision(caplog): """Unit test of precision_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = precision_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"precision": 1}) metric_dict = precision_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"precision": 0.6})
def test_f1(caplog): """Unit test of f1_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = f1_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999})
def test_mean_squared_error(caplog): """Unit test of mean_squared_error_scorer""" caplog.set_level(logging.INFO) golds = np.array([3, -0.5, 2, 7]) probs = np.array([2.5, 0.0, 2, 8]) metric_dict = mean_squared_error_scorer(golds, probs, None) assert isequal(metric_dict, {"mean_squared_error": 0.375}) golds = np.array([[0.5, 1], [-1, 1], [7, -6]]) probs = np.array([[0, 2], [-1, 2], [8, -5]]) metric_dict = mean_squared_error_scorer(golds, probs, None) assert isequal(metric_dict, {"mean_squared_error": 0.7083333333333334})
def test_matthews_corrcoef(caplog): """Unit test of matthews_correlation_coefficient_scorer.""" caplog.set_level(logging.INFO) metric_dict = matthews_correlation_coefficient_scorer(GOLDS, PROBS, PREDS) assert isequal(metric_dict, {"matthews_corrcoef": 0.4472135954999579}) metric_dict = matthews_correlation_coefficient_scorer(GOLDS, None, PREDS) assert isequal(metric_dict, {"matthews_corrcoef": 0.4472135954999579}) metric_dict = matthews_correlation_coefficient_scorer( PROB_GOLDS, PROBS, PREDS) assert isequal(metric_dict, {"matthews_corrcoef": 0.4472135954999579}) metric_dict = matthews_correlation_coefficient_scorer( PROB_GOLDS, None, PREDS) assert isequal(metric_dict, {"matthews_corrcoef": 0.4472135954999579})
def test_pearson_correlation(caplog): """Unit test of pearson_correlation_scorer.""" caplog.set_level(logging.INFO) metric_dict = pearson_correlation_scorer(GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"pearson_correlation": 0.5667402091575048}) metric_dict = pearson_correlation_scorer(GOLDS, UNARY_PROBS, None, return_pvalue=True) assert isequal( metric_dict, { "pearson_correlation": 0.5667402091575048, "pearson_pvalue": 0.24090659530906683, }, )
def test_spearman_correlation(caplog): """Unit test of spearman_correlation_scorer.""" caplog.set_level(logging.INFO) metric_dict = spearman_correlation_scorer(GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"spearman_correlation": 0.5940885257860046}) metric_dict = spearman_correlation_scorer(GOLDS, UNARY_PROBS, None, return_pvalue=True) assert isequal( metric_dict, { "spearman_correlation": 0.5940885257860046, "spearman_pvalue": 0.21370636293028789, }, )
def test_pearson_spearman(caplog): """Unit test of pearson_spearman_scorer""" caplog.set_level(logging.INFO) golds = np.array([1, 0, 1, 0, 1, 0]) probs = np.array([0.8, 0.6, 0.9, 0.7, 0.7, 0.2]) metric_dict = pearson_spearman_scorer(golds, probs, None) assert isequal(metric_dict, {"pearson_spearman": 0.7342997297919428})
def test_accuracy_f1(caplog): """Unit test of accuracy_f1_scorer.""" caplog.set_level(logging.INFO) metric_dict = accuracy_f1_scorer(GOLDS, None, PREDS) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"accuracy_f1": 0.7083333333333333}) metric_dict = accuracy_f1_scorer(PROB_GOLDS, None, PREDS) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333})
def test_accuracy_f1(caplog): """Unit test of accuracy_f1_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = accuracy_f1_scorer(golds, None, preds) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"accuracy_f1": 0.5833333333333333}) metric_dict = accuracy_f1_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"accuracy_f1": 0.7083333333333333})
def test_matthews_corrcoef(caplog): """Unit test of matthews_correlation_coefficient_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = matthews_correlation_coefficient_scorer(golds, None, preds) assert isequal(metric_dict, {"matthews_corrcoef": 0.4472135954999579})
def test_recall(caplog): """Unit test of recall_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) gold_probs = np.array([[0.6, 0.4], [0.1, 0.9], [0.7, 0.3], [0.2, 0.8], [0.9, 0.1], [0.4, 0.6]]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = recall_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"recall": 1}) metric_dict = recall_scorer(gold_probs, None, preds, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(gold_probs, None, preds, pos_label=0) assert isequal(metric_dict, {"recall": 1})
def test_roc_auc(caplog): """Unit test of roc_auc_scorer.""" caplog.set_level(logging.INFO) metric_dict = roc_auc_scorer(GOLDS, PROBS, None) assert isequal(metric_dict, {"roc_auc": 0.8333333333333333}) metric_dict = roc_auc_scorer(PROB_GOLDS, PROBS, None) assert isequal(metric_dict, {"roc_auc": 0.8333333333333333}) metric_dict = roc_auc_scorer(GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"roc_auc": 0.8333333333333334}) metric_dict = roc_auc_scorer( GOLDS.reshape(GOLDS.shape[0], 1), UNARY_PROBS.reshape(UNARY_PROBS.shape[0], 1), None, ) assert isequal(metric_dict, {"roc_auc": 0.8333333333333334}) metric_dict = roc_auc_scorer(PROB_GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"roc_auc": 0.8333333333333334}) ALL_ONES = np.array([1, 1, 1, 1, 1, 1]) metric_dict = roc_auc_scorer(ALL_ONES, PROBS, None) assert isequal(metric_dict, {"roc_auc": float("nan")})
def test_pearson_correlation(caplog): """Unit test of pearson_correlation_scorer""" caplog.set_level(logging.INFO) golds = np.array([1, 0, 1, 0, 1, 0]) probs = np.array([0.8, 0.6, 0.9, 0.7, 0.7, 0.2]) metric_dict = pearson_correlation_scorer(golds, probs, None) assert isequal(metric_dict, {"pearson_correlation": 0.6764814252025461}) metric_dict = pearson_correlation_scorer(golds, probs, None, return_pvalue=True) assert isequal( metric_dict, { "pearson_correlation": 0.6764814252025461, "pearson_pvalue": 0.14006598491201777, }, )
def test_spearman_correlation(caplog): """Unit test of spearman_correlation_scorer""" caplog.set_level(logging.INFO) golds = np.array([1, 0, 1, 0, 1, 0]) probs = np.array([0.8, 0.6, 0.9, 0.7, 0.7, 0.2]) metric_dict = spearman_correlation_scorer(golds, probs, None) assert isequal(metric_dict, {"spearman_correlation": 0.7921180343813395}) metric_dict = spearman_correlation_scorer(golds, probs, None, return_pvalue=True) assert isequal( metric_dict, { "spearman_correlation": 0.7921180343813395, "spearman_pvalue": 0.06033056705743058, }, )
def test_accuracy(caplog): """Unit test of accuracy_scorer.""" caplog.set_level(logging.INFO) metric_dict = accuracy_scorer(GOLDS, PROBS, PREDS) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(GOLDS, None, PREDS) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(GOLDS, PROBS, None) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(GOLDS, None, PREDS, normalize=False) assert isequal(metric_dict, {"accuracy": 4}) metric_dict = accuracy_scorer(GOLDS, PROBS, PREDS, topk=2) assert isequal(metric_dict, {"accuracy@2": 1.0}) metric_dict = accuracy_scorer(PROB_GOLDS, None, PREDS) assert isequal(metric_dict, {"accuracy": 0.6666666666666666}) metric_dict = accuracy_scorer(PROB_GOLDS, PROBS, PREDS, topk=2) assert isequal(metric_dict, {"accuracy@2": 1.0}) metric_dict = accuracy_scorer(PROB_GOLDS, PROBS, PREDS, topk=2, normalize=False) assert isequal(metric_dict, {"accuracy@2": 6})
def test_f1(caplog): """Unit test of f1_scorer.""" caplog.set_level(logging.INFO) metric_dict = f1_scorer(GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999}) metric_dict = f1_scorer(PROB_GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(PROB_GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(PROB_GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999})
def test_recall(caplog): """Unit test of recall_scorer.""" caplog.set_level(logging.INFO) metric_dict = recall_scorer(GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"recall": 1}) metric_dict = recall_scorer(PROB_GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(PROB_GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"recall": 0.3333333333333333}) metric_dict = recall_scorer(PROB_GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"recall": 1})
def test_fbeta(caplog): """Unit test of fbeta_scorer.""" caplog.set_level(logging.INFO) metric_dict = fbeta_scorer(GOLDS, PROBS, PREDS, pos_label=1, beta=2) assert isequal(metric_dict, {"f2": 0.3846153846153846}) metric_dict = fbeta_scorer(GOLDS, None, PREDS, pos_label=1, beta=2) assert isequal(metric_dict, {"f2": 0.3846153846153846}) metric_dict = fbeta_scorer(GOLDS, None, PREDS, pos_label=0, beta=2) assert isequal(metric_dict, {"f2": 0.8823529411764706}) metric_dict = fbeta_scorer(PROB_GOLDS, PROBS, PREDS, pos_label=1, beta=2) assert isequal(metric_dict, {"f2": 0.3846153846153846}) metric_dict = fbeta_scorer(PROB_GOLDS, None, PREDS, pos_label=1, beta=2) assert isequal(metric_dict, {"f2": 0.3846153846153846}) metric_dict = fbeta_scorer(PROB_GOLDS, None, PREDS, pos_label=0, beta=2) assert isequal(metric_dict, {"f2": 0.8823529411764706})
def test_precision(caplog): """Unit test of precision_scorer.""" caplog.set_level(logging.INFO) metric_dict = precision_scorer(GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"precision": 1}) metric_dict = precision_scorer(GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"precision": 1}) metric_dict = precision_scorer(GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"precision": 0.6}) metric_dict = precision_scorer(PROB_GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"precision": 1}) metric_dict = precision_scorer(PROB_GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"precision": 1}) metric_dict = precision_scorer(PROB_GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"precision": 0.6})
# Test default ASGD setting config = { "learner_config": { "optimizer_config": { "optimizer": optimizer, "lr": 0.02, "l2": 0.05, f"{optimizer}_config": { "lambd": 0.1, "alpha": 0.5, "t0": 1e5 }, } } } emmental.Meta.update_config(config) emmental_learner._set_optimizer(model) assert isequal( emmental_learner.optimizer.defaults, { "lr": 0.02, "lambd": 0.1, "alpha": 0.5, "t0": 1e5, "weight_decay": 0.05 }, ) shutil.rmtree(dirpath)
def test_pearson_spearman(caplog): """Unit test of pearson_spearman_scorer.""" caplog.set_level(logging.INFO) metric_dict = pearson_spearman_scorer(GOLDS, UNARY_PROBS, None) assert isequal(metric_dict, {"pearson_spearman": 0.5804143674717547})