def test_compute_f_score_no_gt(): """Test PSDSEvalError raised if gt is missing""" det_t, _ = read_gt_and_det() psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3) with pytest.raises(PSDSEvalError, match="Ground Truth must be provided"): psds_eval.compute_macro_f_score(det_t)
def compute_metrics(predictions, gtruth_df, meta_df): events_metric = compute_sed_eval_metrics(predictions, gtruth_df) macro_f1_event = events_metric.results_class_wise_average_metrics()['f_measure']['f_measure'] dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3 psds = PSDSEval(dtc_threshold, gtc_threshold, cttc_threshold, ground_truth=gtruth_df, metadata=meta_df) psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions) logger.info(f"F1_score (psds_eval) accounting cross triggers: {psds_macro_f1}") return macro_f1_event, psds_macro_f1
def test_compute_f_score_no_det(metadata): det_t, gt_t = read_gt_and_det() det_t = pd.DataFrame(columns=det_t.columns) psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3, ground_truth=gt_t, metadata=metadata) f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t) per_class_f_array = np.fromiter(per_class_f.values(), dtype=float) assert np.isnan(f_avg), "The average F-score was incorrect" assert np.all(np.isnan(per_class_f_array)), "Per-class F-score incorrect"
def compute_metrics(predictions, gtruth_df, meta_df): events_metric, _ = compute_sed_eval_metrics(predictions, gtruth_df) macro_f1_event = events_metric.results_class_wise_average_metrics( )['f_measure']['f_measure'] dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3 psds = PSDSEval(dtc_threshold, gtc_threshold, cttc_threshold, ground_truth=gtruth_df, metadata=meta_df) psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions) return events_metric, psds_macro_f1, macro_f1_event
def test_compute_f_score_gt_later(metadata): """Test computation is correct when gt is not passed at init time""" det_t, gt_t = read_gt_and_det() psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3) psds_eval.set_ground_truth(gt_t, metadata) f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t) expected_class_f = [ 0.7752161383285303, 0.7421383647798742, 0.548936170212766, 0.44747612551159616, 0.6548881036513545, 0.7663551401869159, 0.9405405405405406, 0.6978021978021978, 0.7102941176470589, 0.8427672955974843 ] assert f_avg == pytest.approx( 0.712641), "The average F-score was incorrect" for exp_f, class_f in zip(expected_class_f, per_class_f.values()): assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def test_compute_f_score_gt_later(metadata): """Test computation is correct when gt is not passed at init time""" det_t, gt_t = read_gt_and_det() psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3) psds_eval.set_ground_truth(gt_t, metadata) f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t) expected_class_f = [ 0.7752161383285303, 0.7468354430379747, 0.548936170212766, 0.39943342776203966, 0.6548881036513545, 0.7663551401869159, 0.9405405405405406, 0.6978021978021978, 0.7105553512320706, 0.8427672955974843 ] assert f_avg == pytest.approx(0.7083329808351875), \ "The average F-score was incorrect" for exp_f, class_f in zip(expected_class_f, per_class_f.values()): assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def compute_per_intersection_macro_f1( prediction_dfs, ground_truth_file, durations_file, dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3, ): """ Compute F1-score per intersection, using the defautl Args: prediction_dfs: dict, a dictionary with thresholds keys and predictions dataframe ground_truth_file: pd.DataFrame, the groundtruth dataframe durations_file: pd.DataFrame, the duration dataframe dtc_threshold: float, the parameter used in PSDSEval, percentage of tolerance for groundtruth intersection with predictions gtc_threshold: float, the parameter used in PSDSEval percentage of tolerance for predictions intersection with groundtruth gtc_threshold: float, the parameter used in PSDSEval to know the percentage needed to count FP as cross-trigger Returns: """ gt = pd.read_csv(ground_truth_file, sep="\t") durations = pd.read_csv(durations_file, sep="\t") psds = PSDSEval( ground_truth=gt, metadata=durations, dtc_threshold=dtc_threshold, gtc_threshold=gtc_threshold, cttc_threshold=cttc_threshold, ) psds_macro_f1 = [] for threshold in prediction_dfs.keys(): if not prediction_dfs[threshold].empty: threshold_f1, _ = psds.compute_macro_f_score( prediction_dfs[threshold]) else: threshold_f1 = 0 if np.isnan(threshold_f1): threshold_f1 = 0.0 psds_macro_f1.append(threshold_f1) psds_macro_f1 = np.mean(psds_macro_f1) return psds_macro_f1