Пример #1
0
def test_compute_f_score_no_gt():
    """Test PSDSEvalError raised if gt is missing"""
    det_t, _ = read_gt_and_det()
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3)
    with pytest.raises(PSDSEvalError, match="Ground Truth must be provided"):
        psds_eval.compute_macro_f_score(det_t)
def compute_metrics(predictions, gtruth_df, meta_df):
    events_metric = compute_sed_eval_metrics(predictions, gtruth_df)
    macro_f1_event = events_metric.results_class_wise_average_metrics()['f_measure']['f_measure']
    dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3
    psds = PSDSEval(dtc_threshold, gtc_threshold, cttc_threshold, ground_truth=gtruth_df, metadata=meta_df)
    psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions)
    logger.info(f"F1_score (psds_eval) accounting cross triggers: {psds_macro_f1}")
    return macro_f1_event, psds_macro_f1
Пример #3
0
def test_compute_f_score_no_det(metadata):
    det_t, gt_t = read_gt_and_det()
    det_t = pd.DataFrame(columns=det_t.columns)
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt_t,
                         metadata=metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    per_class_f_array = np.fromiter(per_class_f.values(), dtype=float)
    assert np.isnan(f_avg), "The average F-score was incorrect"
    assert np.all(np.isnan(per_class_f_array)), "Per-class F-score incorrect"
Пример #4
0
def compute_metrics(predictions, gtruth_df, meta_df):
    events_metric, _ = compute_sed_eval_metrics(predictions, gtruth_df)
    macro_f1_event = events_metric.results_class_wise_average_metrics(
    )['f_measure']['f_measure']
    dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3
    psds = PSDSEval(dtc_threshold,
                    gtc_threshold,
                    cttc_threshold,
                    ground_truth=gtruth_df,
                    metadata=meta_df)
    psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions)
    return events_metric, psds_macro_f1, macro_f1_event
Пример #5
0
def test_compute_f_score_gt_later(metadata):
    """Test computation is correct when gt is not passed at init time"""
    det_t, gt_t = read_gt_and_det()
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3)
    psds_eval.set_ground_truth(gt_t, metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    expected_class_f = [
        0.7752161383285303, 0.7421383647798742, 0.548936170212766,
        0.44747612551159616, 0.6548881036513545, 0.7663551401869159,
        0.9405405405405406, 0.6978021978021978, 0.7102941176470589,
        0.8427672955974843
    ]
    assert f_avg == pytest.approx(
        0.712641), "The average F-score was incorrect"
    for exp_f, class_f in zip(expected_class_f, per_class_f.values()):
        assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
Пример #6
0
def test_compute_f_score_gt_later(metadata):
    """Test computation is correct when gt is not passed at init time"""
    det_t, gt_t = read_gt_and_det()
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3)
    psds_eval.set_ground_truth(gt_t, metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    expected_class_f = [
        0.7752161383285303, 0.7468354430379747, 0.548936170212766,
        0.39943342776203966, 0.6548881036513545, 0.7663551401869159,
        0.9405405405405406, 0.6978021978021978, 0.7105553512320706,
        0.8427672955974843
    ]
    assert f_avg == pytest.approx(0.7083329808351875), \
        "The average F-score was incorrect"
    for exp_f, class_f in zip(expected_class_f, per_class_f.values()):
        assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def compute_per_intersection_macro_f1(
    prediction_dfs,
    ground_truth_file,
    durations_file,
    dtc_threshold=0.5,
    gtc_threshold=0.5,
    cttc_threshold=0.3,
):
    """ Compute F1-score per intersection, using the defautl
    Args:
        prediction_dfs: dict, a dictionary with thresholds keys and predictions dataframe
        ground_truth_file: pd.DataFrame, the groundtruth dataframe
        durations_file: pd.DataFrame, the duration dataframe
        dtc_threshold: float, the parameter used in PSDSEval, percentage of tolerance for groundtruth intersection
            with predictions
        gtc_threshold: float, the parameter used in PSDSEval percentage of tolerance for predictions intersection
            with groundtruth
        gtc_threshold: float, the parameter used in PSDSEval to know the percentage needed to count FP as cross-trigger

    Returns:

    """
    gt = pd.read_csv(ground_truth_file, sep="\t")
    durations = pd.read_csv(durations_file, sep="\t")

    psds = PSDSEval(
        ground_truth=gt,
        metadata=durations,
        dtc_threshold=dtc_threshold,
        gtc_threshold=gtc_threshold,
        cttc_threshold=cttc_threshold,
    )
    psds_macro_f1 = []
    for threshold in prediction_dfs.keys():
        if not prediction_dfs[threshold].empty:
            threshold_f1, _ = psds.compute_macro_f_score(
                prediction_dfs[threshold])
        else:
            threshold_f1 = 0
        if np.isnan(threshold_f1):
            threshold_f1 = 0.0
        psds_macro_f1.append(threshold_f1)
    psds_macro_f1 = np.mean(psds_macro_f1)
    return psds_macro_f1