def __init__(self): super().__init__() self.model_param = EvaluateParam() self.eval_results = defaultdict(list) self.save_single_value_metric_list = [ consts.AUC, consts.EXPLAINED_VARIANCE, consts.MEAN_ABSOLUTE_ERROR, consts.MEAN_SQUARED_ERROR, consts.MEAN_SQUARED_LOG_ERROR, consts.MEDIAN_ABSOLUTE_ERROR, consts.R2_SCORE, consts.ROOT_MEAN_SQUARED_ERROR ] self.save_curve_metric_list = [ consts.KS, consts.ROC, consts.LIFT, consts.GAIN, consts.PRECISION, consts.RECALL, consts.ACCURACY ] self.special_metric_list = [consts.PSI] self.metrics = None self.round_num = 6 self.validate_metric = {} self.train_metric = {} # where to call metric computations self.metric_interface: MetricInterface = None self.psi_train_scores, self.psi_validate_scores = None, None self.psi_train_labels, self.psi_validate_labels = None, None
def test_precision(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array( [0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) true_score = [[0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.5555555555555556, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.625, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.7142857142857143, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [0.8333333333333334, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.7142857142857143], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.625], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [1.0, 0.5555555555555556], [0.0, 0.5]] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) rs = eva.precision(y_true, y_predict) self.assertListEqual(true_score, rs[0])
def test_median_absolute_error(self): eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.REGRESSION, pos_label=1)) y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.5) y_true = [3, -0.6, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.55)
def test_auc(self): y_true = np.array([0, 0, 1, 1]) y_predict = np.array([0.1, 0.4, 0.35, 0.8]) ground_true_auc = 0.75 eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) auc = eva.auc(y_true, y_predict) auc = round(auc, 2) self.assertFloatEqual(auc, ground_true_auc)
def test_recall(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array( [0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) true_score = [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.4], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.6], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 0.8], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.6, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.4, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.2, 1.0], [0.0, 1.0]] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) rs = eva.recall(y_true, y_predict) self.assertListEqual(rs[0], true_score)
def test_mean_squared_error(self): eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.REGRESSION, pos_label=1)) y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.mean_squared_error(y_true, y_pred), 0.375) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual( np.around(eva.mean_squared_error(y_true, y_pred), 4), 0.7083)
def test_mean_squared_log_error(self): eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.REGRESSION, pos_label=1)) y_true = [3, 5, 2.5, 7] y_pred = [2.5, 5, 4, 8] self.assertFloatEqual( np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0397) y_true = [[0.5, 1], [1, 2], [7, 6]] y_pred = [[0.5, 2], [1, 2.5], [8, 8]] self.assertFloatEqual( np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0442)
def test_explained_variance(self): eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.REGRESSION, pos_label=1)) y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual( np.around(eva.explained_variance(y_true, y_pred), 4), 0.9572) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual( np.around(eva.explained_variance(y_true, y_pred), 4), 0.9839)
def test_multi_accuracy(self): y_true = np.array( [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]) y_predict = [ 1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4 ] gt_score = 0.6 eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.MULTY, pos_label=1)) acc = eva.accuracy(y_true, y_predict) self.assertFloatEqual(acc, gt_score)
def test_ks(self): y_true = np.array( [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]) y_predict = np.array([ 0.42, 0.73, 0.55, 0.37, 0.57, 0.70, 0.25, 0.23, 0.46, 0.62, 0.76, 0.46, 0.55, 0.56, 0.56, 0.38, 0.37, 0.73, 0.77, 0.21, 0.39 ]) ground_true_ks = 0.75 eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) ks, fpr, tpr, score_threshold, cuts = eva.ks(y_true, y_predict) ks = round(ks, 2) self.assertFloatEqual(ks, ground_true_ks)
def test_multi_precision(self): y_true = np.array([ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5 ]) y_predict = np.array([ 1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6 ]) gt_score = [0.33333333, 0.25, 0.8, 1., 0., 0.] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.MULTY, pos_label=1)) precision_scores, _ = eva.precision(y_true, y_predict) for a, b in zip(precision_scores, gt_score): assert round(a, 2) == round(b, 2)
def test_multi_recall(self): y_true = np.array([ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5 ]) y_predict = np.array([ 1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6 ]) gt_score = [0.4, 0.2, 0.8, 1, 0, 0] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.MULTY, pos_label=1)) recall_scores, _ = eva.recall(y_true, y_predict) for a, b in zip(recall_scores, gt_score): assert round(a, 2) == round(b, 2)
def __init__(self): super().__init__() self.model_param = EvaluateParam() self.eval_results = defaultdict(list) self.save_single_value_metric_list = [ consts.AUC, consts.EXPLAINED_VARIANCE, consts.MEAN_ABSOLUTE_ERROR, consts.MEAN_SQUARED_ERROR, consts.MEAN_SQUARED_LOG_ERROR, consts.MEDIAN_ABSOLUTE_ERROR, consts.R2_SCORE, consts.ROOT_MEAN_SQUARED_ERROR ] self.save_curve_metric_list = [ consts.KS, consts.ROC, consts.LIFT, consts.GAIN, consts.PRECISION, consts.RECALL, consts.ACCURACY ] self.metrics = None self.round_num = 6 self.validate_metric = {} self.train_metric = {}
def __init__(self): super().__init__() self.model_param = EvaluateParam() self.eval_results = defaultdict(list) self.save_single_value_metric_list = [ consts.AUC, consts.EXPLAINED_VARIANCE, consts.MEAN_ABSOLUTE_ERROR, consts.MEAN_SQUARED_ERROR, consts.MEAN_SQUARED_LOG_ERROR, consts.MEDIAN_ABSOLUTE_ERROR, consts.R2_SCORE, consts.ROOT_MEAN_SQUARED_ERROR ] self.save_curve_metric_list = [ consts.KS, consts.ROC, consts.LIFT, consts.GAIN, consts.PRECISION, consts.RECALL, consts.ACCURACY ] self.regression_support_func = [ consts.EXPLAINED_VARIANCE, consts.MEAN_ABSOLUTE_ERROR, consts.MEAN_SQUARED_ERROR, consts.MEAN_SQUARED_LOG_ERROR, consts.MEDIAN_ABSOLUTE_ERROR, consts.R2_SCORE, consts.ROOT_MEAN_SQUARED_ERROR ] self.binary_classification_support_func = [ consts.AUC, consts.KS, consts.LIFT, consts.GAIN, consts.ACCURACY, consts.PRECISION, consts.RECALL, consts.ROC ] self.multi_classification_support_func = [ consts.ACCURACY, consts.PRECISION, consts.RECALL ] self.metrics = { consts.BINARY: self.binary_classification_support_func, consts.MULTY: self.multi_classification_support_func, consts.REGRESSION: self.regression_support_func } self.round_num = 6
def test_bin_accuracy(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array( [0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) gt_score = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6 ] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) acc, cuts, thresholds = eva.accuracy(y_true, y_predict) self.assertListEqual(acc, gt_score)
def __init__(self): super().__init__() self.model_param = EvaluateParam() self.eval_results = defaultdict(list) self.save_single_value_metric_list = [ consts.AUC, consts.EXPLAINED_VARIANCE, consts.MEAN_ABSOLUTE_ERROR, consts.MEAN_SQUARED_ERROR, consts.MEAN_SQUARED_LOG_ERROR, consts.MEDIAN_ABSOLUTE_ERROR, consts.R2_SCORE, consts.ROOT_MEAN_SQUARED_ERROR, consts.JACCARD_SIMILARITY_SCORE, consts.ADJUSTED_RAND_SCORE, consts.FOWLKES_MALLOWS_SCORE, consts.DAVIES_BOULDIN_INDEX ] self.special_metric_list = [consts.PSI] self.clustering_intra_metric_list = [ consts.DAVIES_BOULDIN_INDEX, consts.DISTANCE_MEASURE ] self.metrics = None self.round_num = 6 self.eval_type = None # where to call metric computations self.metric_interface: MetricInterface = None self.psi_train_scores, self.psi_validate_scores = None, None self.psi_train_labels, self.psi_validate_labels = None, None # multi unfold setting self.need_unfold_multi_result = False # summaries self.metric_summaries = {}
def test_lift(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array( [0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) lift_y_true = [[1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.0, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.1111111111111112, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.25, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.4285714285714286, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [1.6666666666666667, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.4285714285714286], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.25], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112], [2.0, 1.1111111111111112]] lift_x_true = [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.9, 0.1], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.8, 0.2], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.7, 0.3], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.6, 0.4], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.3, 0.7], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.2, 0.8], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]] eva = Evaluation() eva._init_model(EvaluateParam(eval_type=consts.BINARY, pos_label=1)) lift_y, lift_x, thresholds = eva.lift(y_true, y_predict) self.assertListEqual(lift_y, lift_y_true) self.assertListEqual(lift_x, lift_x_true)