def test_compute_for_regression(self): target = np.array([0, 0, 0, 0, 1, 1, 1, 1]) pred = np.array([0.01, 0.2, 0.1, 0.1, 0.8, 0.8, 0.8, 0.8]) info = AdditionalMetrics.compute(target, pred, None, REGRESSION) all_metrics = list(info["max_metrics"]["Metric"].values) for m in ["MAE", "MSE", "RMSE", "R2"]: self.assertTrue(m in all_metrics)
def get_additional_metrics(self): if self._additional_metrics is None: # 'target' - the target after processing used for model training # 'prediction' - out of folds predictions of the model oof_predictions = self.get_out_of_folds() prediction_cols = [ c for c in oof_predictions.columns if "prediction" in c ] target_cols = [c for c in oof_predictions.columns if "target" in c] target = oof_predictions[target_cols] oof_preds = None if self._ml_task == MULTICLASS_CLASSIFICATION: oof_preds = self.preprocessings[0].prepare_target_labels( oof_predictions[prediction_cols].values) else: oof_preds = oof_predictions[prediction_cols] sample_weight = None if "sample_weight" in oof_predictions.columns: sample_weight = oof_predictions["sample_weight"] self._additional_metrics = AdditionalMetrics.compute( target, oof_preds, sample_weight, self._ml_task) if self._ml_task == BINARY_CLASSIFICATION: self._threshold = float(self._additional_metrics["threshold"]) return self._additional_metrics
def get_additional_metrics(self): if self._additional_metrics is None: logger.debug("Get additional metrics for Ensemble") # 'target' - the target after processing used for model training # 'prediction' - out of folds predictions of the model oof_predictions = self.get_out_of_folds() prediction_cols = [ c for c in oof_predictions.columns if "prediction" in c ] target_cols = [c for c in oof_predictions.columns if "target" in c] oof_preds = oof_predictions[prediction_cols] if self._ml_task == MULTICLASS_CLASSIFICATION: cols = oof_preds.columns.tolist() # prediction_ labels = {i: v[11:] for i, v in enumerate(cols)} oof_preds["label"] = np.argmax(np.array( oof_preds[prediction_cols]), axis=1) oof_preds["label"] = oof_preds["label"].map(labels) sample_weight = None if "sample_weight" in oof_predictions.columns: sample_weight = oof_predictions["sample_weight"] self._additional_metrics = AdditionalMetrics.compute( oof_predictions[target_cols], oof_preds, sample_weight, self._ml_task) if self._ml_task == BINARY_CLASSIFICATION: self._threshold = float(self._additional_metrics["threshold"]) return self._additional_metrics
def test_compute_constant_preds(self): target = np.array([0, 0, 1, 1, 0, 0, 0, 0]) pred = np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) info = AdditionalMetrics.compute(target, pred, BINARY_CLASSIFICATION) details = info["metric_details"] max_metrics = info["max_metrics"] conf = info["confusion_matrix"] self.assertTrue(max_metrics["f1"]["score"] < 1) self.assertTrue(max_metrics["mcc"]["score"] < 1)
def test_compute_f1(self): target = np.array([0, 0, 0, 0, 1, 1, 1, 1]) pred = np.array([0.01, 0.2, 0.1, 0.1, 0.8, 0.8, 0.8, 0.8]) info = AdditionalMetrics.compute(target, pred, BINARY_CLASSIFICATION) details = info["metric_details"] max_metrics = info["max_metrics"] conf = info["confusion_matrix"] self.assertEqual(max_metrics["f1"]["score"], 1) self.assertTrue(details is not None) self.assertTrue(conf is not None)
def test_compute(self): target = np.array([0, 0, 0, 0, 1, 1, 1, 1]) pred = np.array([0.1, 0.8, 0.1, 0.1, 0.8, 0.1, 0.8, 0.8]) info = AdditionalMetrics.compute(target, pred, BINARY_CLASSIFICATION) details = info["metric_details"] max_metrics = info["max_metrics"] conf = info["confusion_matrix"] self.assertEqual(conf.iloc[0, 0], 3) self.assertEqual(conf.iloc[1, 1], 3) self.assertTrue(details is not None) self.assertTrue(max_metrics is not None)
def get_additional_metrics(self): if self._additional_metrics is None: logger.debug("Get additional metrics for Ensemble") # 'target' - the target after processing used for model training # 'prediction' - out of folds predictions of the model oof_predictions = self.get_out_of_folds() prediction_cols = [ c for c in oof_predictions.columns if "prediction" in c ] target_cols = [c for c in oof_predictions.columns if "target" in c] print(oof_predictions) print(prediction_cols) print(target_cols) # need to prepare label for multiclass # print("ensemble") # print(oof_predictions[prediction_cols]) # oof_preds = self.preprocessings[0].prepare_target_labels( # oof_predictions[prediction_cols].values # ) oof_preds = oof_predictions[prediction_cols] if self._ml_task == MULTICLASS_CLASSIFICATION: cols = oof_preds.columns.tolist() # prediction_ labels = {i: v[11:] for i, v in enumerate(cols)} oof_preds["label"] = np.argmax(np.array( oof_preds[prediction_cols]), axis=1) oof_preds["label"] = oof_preds["label"].map(labels) self._additional_metrics = AdditionalMetrics.compute( oof_predictions[target_cols], oof_preds, # oof_predictions[prediction_cols], self._ml_task, ) if self._ml_task == BINARY_CLASSIFICATION: self._threshold = float(self._additional_metrics["threshold"]) print(self._additional_metrics["max_metrics"]) print(self._threshold) return self._additional_metrics