def test_auc_perf_metric(self): np.random.seed(1) groundtruths = np.random.normal(0, 1.0, [4, 10]) + np.tile(np.array([1, 2, 3, 4]), [10, 1]).T predictions = [1, 2, 3, 4] metric = AucPerfMetric(groundtruths, predictions) result = metric.evaluate() self.assertAlmostEqual(result['score'], 0.9375, places=6) self.assertAlmostEqual(result['AUC_BW'], 0.9999999999999999, places=6) self.assertAlmostEqual(result['AUC_DS'], 0.9375, places=6) self.assertAlmostEqual(result['CC_0'], 1.0, places=6) self.assertAlmostEqual(result['THR'], 3.0, places=6)
def test_auc_metrics_performance(self): mat_filepath = VmafConfig.test_resource_path('data_Toyama.mat') mat_dict = scipy.io.loadmat(mat_filepath) results = AucPerfMetric._metrics_performance(mat_dict['objScoDif'], mat_dict['signif']) self.assertAlmostEqual(np.mean(results['AUC_DS']), 0.69767003960902052, places=6) self.assertAlmostEqual(np.mean(results['AUC_BW']), 0.94454700301894534, places=6) self.assertAlmostEqual(np.mean(results['CC_0']), 0.88105386206276415, places=6) self.assertAlmostEqual(np.mean(results['THR']), 6.2392849606450556, places=6)
def test_auc_perf_multiple_metrics(self): np.random.seed(1) groundtruths = np.random.normal(0, 1.0, [4, 10]) + np.tile(np.array([1, 2, 3, 4]), [10, 1]).T predictions = [[1, 2, 3, 4], [3, 1, 2, 4]] metric = AucPerfMetric(groundtruths, predictions) result = metric.evaluate() self.assertAlmostEqual(result['score'][0], 0.9999999999999999, places=6) self.assertAlmostEqual(result['AUC_BW'][0], 0.9999999999999999, places=6) self.assertAlmostEqual(result['AUC_DS'][0], 0.9375, places=6) self.assertAlmostEqual(result['CC_0'][0], 1.0, places=6) self.assertAlmostEqual(result['THR'][0], 1.0, places=6) self.assertAlmostEqual(result['score'][1], 0.8125, places=6) self.assertAlmostEqual(result['AUC_BW'][1], 0.8125, places=6) self.assertAlmostEqual(result['AUC_DS'][1], 0.6250, places=6) self.assertAlmostEqual(result['CC_0'][1], 0.75, places=6) self.assertAlmostEqual(result['THR'][1], 2, places=6) self.assertAlmostEqual(result['pDS_DL'][0, 1], 0.02746864, places=6) self.assertAlmostEqual(result['pBW_DL'][0, 1], 0.06136883, places=6) self.assertAlmostEqual(result['pCC0_b'][0, 1], 0.03250944, places=6)
def get_stats(cls, ys_label, ys_label_pred, **kwargs): # cannot have None assert all(x is not None for x in ys_label) assert all(x is not None for x in ys_label_pred) # RMSE rmse = RmsePerfMetric(ys_label, ys_label_pred) \ .evaluate(enable_mapping=True)['score'] # spearman srcc = SrccPerfMetric(ys_label, ys_label_pred) \ .evaluate(enable_mapping=True)['score'] # pearson pcc = PccPerfMetric(ys_label, ys_label_pred) \ .evaluate(enable_mapping=True)['score'] # kendall kendall = KendallPerfMetric(ys_label, ys_label_pred) \ .evaluate(enable_mapping=True)['score'] stats = {'RMSE': rmse, 'SRCC': srcc, 'PCC': pcc, 'KENDALL': kendall, 'ys_label': list(ys_label), 'ys_label_pred': list(ys_label_pred)} ys_label_raw = kwargs['ys_label_raw'] if 'ys_label_raw' in kwargs else None if ys_label_raw is not None: try: # AUC auc = AucPerfMetric(ys_label_raw, ys_label_pred) \ .evaluate()['score'] stats['AUC'] = auc except TypeError: # AUC would not work with dictionary-style dataset stats['AUC'] = float('nan') try: # ResPow respow = ResolvingPowerPerfMetric(ys_label_raw, ys_label_pred) \ .evaluate()['score'] stats['ResPow'] = respow except TypeError: # ResPow would not work with dictionary-style dataset stats['ResPow'] = float('nan') if 'ys_label_stddev' in kwargs and 'ys_label_stddev' and kwargs['ys_label_stddev'] is not None: stats['ys_label_stddev'] = kwargs['ys_label_stddev'] return stats