示例#1
0
def test_setting_ground_truth_more_than_once():
    """Ensure that an error is raised when the ground truth is set twice"""
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)

    with pytest.raises(PSDSEvalError, match="You cannot set the ground truth "
                                            "more than once per evaluation"):
        psds_eval.set_ground_truth(gt_t=gt, meta_t=metadata)
示例#2
0
def test_that_add_operating_point_added_a_point():
    """Ensure add_operating_point adds an operating point correctly"""
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    psds_eval.add_operating_point(det)
    assert psds_eval.num_operating_points() == 1
    assert psds_eval.operating_points["id"][0] == \
        "423089ce6d6554174881f69f9d0e57a8be9f5bc682dfce301462a8753aa6ec5f"
示例#3
0
def test_that_add_operating_point_added_a_point():
    """Ensure add_operating_point adds an operating point correctly"""
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    psds_eval.add_operating_point(det)
    assert psds_eval.num_operating_points() == 1
    assert psds_eval.operating_points["id"][0] == \
        "6f504797195d2df3bae13e416b8bf96ca89ec4e4e4d031dadadd72e382640387"
示例#4
0
def test_add_operating_point_with_empty_dataframe():
    """Ensure add_operating_point raises an error when given an
    incorrect table"""
    det = pd.DataFrame()
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    with pytest.raises(PSDSEvalError,
                       match="The data columns need to match the following"):
        psds_eval.add_operating_point(det)
示例#5
0
def test_add_operating_point_with_zero_detections():
    """An error must not be raised when there are no detections"""
    det = pd.read_csv(os.path.join(DATADIR, "empty.det"), sep="\t")
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    psds_eval.add_operating_point(det)
    assert psds_eval.num_operating_points() == 1
    assert psds_eval.operating_points["id"][0] == \
        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
示例#6
0
def test_add_operating_point_with_wrong_data_format():
    """Ensure add_operating_point raises an error when the input is not a
    pandas table"""
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t").to_numpy()
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    with pytest.raises(PSDSEvalError,
                       match="The detection data must be provided "
                             "in a pandas.DataFrame"):
        psds_eval.add_operating_point(det)
def test_two_operating_points_second_has_filtered_out_gtc():
    """Tests a case where the gt coverage df becomes empty for the second op"""
    gt = pd.read_csv(join(DATADIR, "test_1.gt"), sep="\t")
    metadata = pd.read_csv(join(DATADIR, "test.metadata"), sep="\t")
    psds_eval = PSDSEval(1, 1, 1, ground_truth=gt, metadata=metadata)
    det = pd.read_csv(join(DATADIR, "test_1.det"), sep="\t")
    det2 = pd.read_csv(join(DATADIR, "test_1a.det"), sep="\t")
    psds_eval.add_operating_point(det)
    psds_eval.add_operating_point(det2)
    assert psds_eval.psds(0.0, 0.0, 100.0).value == pytest.approx(0.0), \
        "PSDS value was calculated incorrectly"
示例#8
0
def compute_metrics(predictions, gtruth_df, meta_df):
    events_metric, _ = compute_sed_eval_metrics(predictions, gtruth_df)
    macro_f1_event = events_metric.results_class_wise_average_metrics(
    )['f_measure']['f_measure']
    dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3
    psds = PSDSEval(dtc_threshold,
                    gtc_threshold,
                    cttc_threshold,
                    ground_truth=gtruth_df,
                    metadata=meta_df)
    psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions)
    return events_metric, psds_macro_f1, macro_f1_event
def test_two_operating_points_one_with_no_detections():
    """Tests a case where the dtc and gtc df's are empty for the second op"""
    gt = pd.read_csv(join(DATADIR, "test_1.gt"), sep="\t")
    metadata = pd.read_csv(join(DATADIR, "test.metadata"), sep="\t")
    psds_eval = PSDSEval(ground_truth=gt, metadata=metadata)
    det = pd.read_csv(join(DATADIR, "test_1.det"), sep="\t")
    det2 = pd.read_csv(join(DATADIR, "test_4.det"), sep="\t")
    psds_eval.add_operating_point(det)
    psds_eval.add_operating_point(det2)
    assert psds_eval.psds(0.0, 0.0, 100.0).value == \
        pytest.approx(0.9142857142857143), \
        "PSDS value was calculated incorrectly"
示例#10
0
def test_compute_f_score_no_det(metadata):
    det_t, gt_t = read_gt_and_det()
    det_t = pd.DataFrame(columns=det_t.columns)
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt_t,
                         metadata=metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    per_class_f_array = np.fromiter(per_class_f.values(), dtype=float)
    assert np.isnan(f_avg), "The average F-score was incorrect"
    assert np.all(np.isnan(per_class_f_array)), "Per-class F-score incorrect"
示例#11
0
def compute_metrics(predictions, gtruth_df, meta_df):
    events_metric = compute_sed_eval_metrics(predictions, gtruth_df)
    macro_f1_event = events_metric.results_class_wise_average_metrics(
    )['f_measure']['f_measure']
    dtc_threshold, gtc_threshold, cttc_threshold = 0.5, 0.5, 0.3
    psds = PSDSEval(dtc_threshold,
                    gtc_threshold,
                    cttc_threshold,
                    ground_truth=gtruth_df,
                    metadata=meta_df)
    psds_macro_f1, psds_f1_classes = psds.compute_macro_f_score(predictions)
    logger.info(
        f"F1_score (psds_eval) accounting cross triggers: {psds_macro_f1}")
    return macro_f1_event, psds_macro_f1
示例#12
0
def test_add_operating_point_with_info_using_column_names():
    """Check for non-permitted keys in the info"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det1 = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    info1 = {"counts": 0, "threshold1": 1}
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)

    with pytest.raises(PSDSEvalError,
                       match="the 'info' cannot contain the keys 'id', "
                             "'counts', 'tpr', 'fpr' or 'ctr'"):
        psds_eval.add_operating_point(det1, info=info1)
示例#13
0
def compute_psds_from_operating_points(list_predictions,
                                       groundtruth_df,
                                       meta_df,
                                       dtc_threshold=0.5,
                                       gtc_threshold=0.5,
                                       cttc_threshold=0.3):
    psds = PSDSEval(dtc_threshold,
                    gtc_threshold,
                    cttc_threshold,
                    ground_truth=groundtruth_df,
                    metadata=meta_df)
    for prediction_df in list_predictions:
        psds.add_operating_point(prediction_df)
    return psds
示例#14
0
def test_set_ground_truth_with_overlapping_events(table_name, raise_error):
    """Gronud truth with overlapping events must raise an error"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, table_name), sep="\t")
    psds_eval = PSDSEval()
    if raise_error:
        with pytest.raises(
                PSDSEvalError,
                match="The ground truth dataframe provided has intersecting "
                "events/labels for the same class."):
            psds_eval.set_ground_truth(gt, metadata)
    else:
        psds_eval.set_ground_truth(gt, metadata)
        assert isinstance(psds_eval.ground_truth, pd.DataFrame) is True
示例#15
0
def test_unknown_class_constraint_check():
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det1 = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    info1 = {"name": "test_1", "threshold1": 1}
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)
    psds_eval.add_operating_point(det1, info=info1)
    constraints = pd.DataFrame([
        {"class_name": "class1", "constraint": "tpr", "value": 1.}])

    with pytest.raises(PSDSEvalError,
                       match="Unknown class: class1"):
        psds_eval.select_operating_points_per_class(constraints,
                                                    alpha_ct=1., beta=1.)
示例#16
0
def test_delete_ops():
    """Perform deletion of ops"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    det_2 = pd.read_csv(os.path.join(DATADIR, "test_1a.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)

    assert psds_eval.operating_points.empty
    psds_eval.add_operating_point(det)
    psds_eval.add_operating_point(det_2)
    assert psds_eval.num_operating_points() == 2

    psds_eval.clear_all_operating_points()
    assert psds_eval.operating_points.empty
示例#17
0
def test_adding_shuffled_operating_points():
    """Avoid the addition of the same operating point after shuffling"""
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)
    psds_eval.add_operating_point(det)
    det_shuffled = det.copy(deep=True)
    det_shuffled = det_shuffled.sample(frac=1.).reset_index(drop=True)
    psds_eval.add_operating_point(det_shuffled)
    det_shuffled2 = det.copy(deep=True)
    det_shuffled2 = det_shuffled2[["onset", "event_label", "offset",
                                   "filename"]]
    psds_eval.add_operating_point(det_shuffled2)
    assert psds_eval.num_operating_points() == 1
    assert psds_eval.operating_points["id"][0] == \
        "423089ce6d6554174881f69f9d0e57a8be9f5bc682dfce301462a8753aa6ec5f"
示例#18
0
def test_compute_f_score_gt_later(metadata):
    """Test computation is correct when gt is not passed at init time"""
    det_t, gt_t = read_gt_and_det()
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3)
    psds_eval.set_ground_truth(gt_t, metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    expected_class_f = [
        0.7752161383285303, 0.7468354430379747, 0.548936170212766,
        0.39943342776203966, 0.6548881036513545, 0.7663551401869159,
        0.9405405405405406, 0.6978021978021978, 0.7105553512320706,
        0.8427672955974843
    ]
    assert f_avg == pytest.approx(0.7083329808351875), \
        "The average F-score was incorrect"
    for exp_f, class_f in zip(expected_class_f, per_class_f.values()):
        assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
示例#19
0
def test_compute_f_score_gt_later(metadata):
    """Test computation is correct when gt is not passed at init time"""
    det_t, gt_t = read_gt_and_det()
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3)
    psds_eval.set_ground_truth(gt_t, metadata)
    f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t)
    expected_class_f = [
        0.7752161383285303, 0.7421383647798742, 0.548936170212766,
        0.44747612551159616, 0.6548881036513545, 0.7663551401869159,
        0.9405405405405406, 0.6978021978021978, 0.7102941176470589,
        0.8427672955974843
    ]
    assert f_avg == pytest.approx(
        0.712641), "The average F-score was incorrect"
    for exp_f, class_f in zip(expected_class_f, per_class_f.values()):
        assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def compute_per_intersection_macro_f1(
    prediction_dfs,
    ground_truth_file,
    durations_file,
    dtc_threshold=0.5,
    gtc_threshold=0.5,
    cttc_threshold=0.3,
):
    """ Compute F1-score per intersection, using the defautl
    Args:
        prediction_dfs: dict, a dictionary with thresholds keys and predictions dataframe
        ground_truth_file: pd.DataFrame, the groundtruth dataframe
        durations_file: pd.DataFrame, the duration dataframe
        dtc_threshold: float, the parameter used in PSDSEval, percentage of tolerance for groundtruth intersection
            with predictions
        gtc_threshold: float, the parameter used in PSDSEval percentage of tolerance for predictions intersection
            with groundtruth
        gtc_threshold: float, the parameter used in PSDSEval to know the percentage needed to count FP as cross-trigger

    Returns:

    """
    gt = pd.read_csv(ground_truth_file, sep="\t")
    durations = pd.read_csv(durations_file, sep="\t")

    psds = PSDSEval(
        ground_truth=gt,
        metadata=durations,
        dtc_threshold=dtc_threshold,
        gtc_threshold=gtc_threshold,
        cttc_threshold=cttc_threshold,
    )
    psds_macro_f1 = []
    for threshold in prediction_dfs.keys():
        if not prediction_dfs[threshold].empty:
            threshold_f1, _ = psds.compute_macro_f_score(
                prediction_dfs[threshold])
        else:
            threshold_f1 = 0
        if np.isnan(threshold_f1):
            threshold_f1 = 0.0
        psds_macro_f1.append(threshold_f1)
    psds_macro_f1 = np.mean(psds_macro_f1)
    return psds_macro_f1
示例#21
0
def test_setting_ground_truth_and_metadata_with_extra_columns():
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    gt["extra_gt_col"] = True
    assert len(gt.columns) > len(PSDSEval.detection_cols), \
        "There should be more columns in this test"
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    metadata["additional_info"] = "VALID"
    expected_metadata_cols = ["filename", "duration"]
    assert len(metadata.columns) > len(expected_metadata_cols), \
        "There are too few metadata columns for this test"
    psds_eval = PSDSEval(metadata=metadata, ground_truth=gt)

    expected_gt_cols = [
        "filename", "onset", "offset", "event_label", "duration", "id"
    ]
    np.testing.assert_array_equal(psds_eval.ground_truth.columns,
                                  expected_gt_cols)
    np.testing.assert_array_equal(psds_eval.metadata.columns,
                                  expected_metadata_cols)
示例#22
0
def test_add_operating_points_with_overlapping_events(table_name, raise_error):
    """Detections with overlapping events must raise an error"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det = pd.read_csv(os.path.join(DATADIR, table_name), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt,
                         metadata=metadata)
    if raise_error:
        with pytest.raises(
                PSDSEvalError,
                match="The detection dataframe provided has intersecting "
                "events/labels for the same class."):
            psds_eval.add_operating_point(det)
    else:
        psds_eval.add_operating_point(det)
        assert psds_eval.num_operating_points() == 1
示例#23
0
def test_impossible_constraint_check():
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det1 = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    info1 = {"name": "test_1", "threshold1": 1}
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)
    psds_eval.add_operating_point(det1, info=info1)
    constraints = pd.DataFrame([
        {"class_name": "c2", "constraint": "fpr", "value": 11.},
        {"class_name": "c1", "constraint": "tpr", "value": 1.1}])
    chosen_op_points = \
        psds_eval.select_operating_points_per_class(constraints, alpha_ct=1.,
                                                    beta=1.)
    assert np.isnan(chosen_op_points.TPR[0]), \
        "NaN value is not returned for 0, 0 operating point"
    assert np.isnan(chosen_op_points.TPR[1]), \
        "NaN value is not returned for non-existing operating point"
示例#24
0
def test_add_same_operating_point_with_different_info():
    """Check the use of conflicting info for the same operating point"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det1 = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    info1 = {"name": "test_1", "threshold1": 1}
    info2 = {"name": "test_1_2", "threshold2": 0}
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)
    psds_eval.add_operating_point(det1, info=info1)
    psds_eval.add_operating_point(det1, info=info2)
    assert psds_eval.num_operating_points() == 1
    assert psds_eval.operating_points.name[0] == "test_1", \
        "The info name is not correctly reported."
    assert psds_eval.operating_points.threshold1[0] == 1, \
        "The info threshold1 is not correctly reported."
    assert "threshold2" not in psds_eval.operating_points.columns, \
        "The info of ignored operating point modified the operating " \
        "points table."
示例#25
0
def test_retrieve_desired_operating_point():
    """Check if operating points can be found with requested constraints"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det1 = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    det2 = pd.read_csv(os.path.join(DATADIR, "test_2.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    info1 = {"name": "test_1", "threshold1": 1}
    info2 = {"name": "test_2", "threshold2": 0}
    psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5,
                         cttc_threshold=0.3, ground_truth=gt,
                         metadata=metadata)
    psds_eval.add_operating_point(det1, info=info1)
    psds_eval.add_operating_point(det2, info=info2)
    constraints = pd.DataFrame([
        {"class_name": "c1", "constraint": "tpr", "value": 1.},
        {"class_name": "c1", "constraint": "tpr", "value": 0.8},
        {"class_name": "c2", "constraint": "fpr", "value": 13.},
        {"class_name": "c3", "constraint": "efpr", "value": 240.},
        {"class_name": "c3", "constraint": "efpr", "value": 26.},
        {"class_name": "c1", "constraint": "fscore", "value": np.nan}])
    chosen_op_points = \
        psds_eval.select_operating_points_per_class(constraints, alpha_ct=1.,
                                                    beta=1.)
    assert chosen_op_points.name[0] == "test_1", \
        "Correct operating point is not chosen for tpr criteria with equality"
    assert chosen_op_points.name[1] == "test_1", \
        "Correct operating point is not chosen for tpr criteria with " \
        "inequality"
    assert chosen_op_points.name[2] == "test_1", \
        "Correct operating point is not chosen for fpr criteria with " \
        "inequality"
    assert chosen_op_points.name[3] == "test_1", \
        "Correct operating point is not chosen for efpr criteria with " \
        "equality"
    assert chosen_op_points.name[4] == "test_1", \
        "Correct operating point is not chosen for efpr criteria with " \
        "inequality"
    assert chosen_op_points.name[5] == "test_1", \
        "Correct operating point is not chosen for fscore criteria"
    assert chosen_op_points.Fscore[5] == pytest.approx(2./3.), \
        "Correct operating point is not chosen for fscore criteria"
示例#26
0
def test_full_dcase_validset():
    """Run PSDSEval on all the example data from DCASE"""
    det = pd.read_csv(join(DATADIR, "baseline_validation_AA_0.005.csv"),
                      sep="\t")
    gt = pd.read_csv(join(DATADIR, "baseline_validation_gt.csv"), sep="\t")
    metadata = pd.read_csv(join(DATADIR, "baseline_validation_metadata.csv"),
                           sep="\t")
    # Record the checksums of the incoming data
    meta_hash = pd.util.hash_pandas_object(metadata).values
    gt_hash = pd.util.hash_pandas_object(gt).values
    det_hash = pd.util.hash_pandas_object(det).values

    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt,
                         metadata=metadata)
    # matrix (n_class, n_class) last col/row is world (for FP)
    exp_counts = np.array(
        [[269, 9, 63, 41, 120, 13, 7, 18, 128, 2, 302],
         [5, 59, 4, 45, 29, 31, 35, 46, 86, 58,
          416], [54, 17, 129, 19, 105, 13, 14, 16, 82, 20, 585],
         [37, 43, 8, 164, 56, 9, 63, 63, 87, 7, 1100],
         [45, 10, 79, 73, 278, 7, 24, 51, 154, 22, 1480],
         [14, 22, 11, 24, 30, 41, 51, 26, 62, 43, 386],
         [3, 20, 12, 136, 96, 35, 87, 103, 97, 27, 840],
         [8, 41, 13, 119, 93, 48, 135, 127, 185, 32, 662],
         [89, 120, 74, 493, 825, 203, 403, 187, 966, 89, 1340],
         [0, 83, 1, 12, 58, 27, 46, 46, 120, 67, 390],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

    psds_eval.add_operating_point(det)
    assert np.all(psds_eval.operating_points.counts[0] == exp_counts)
    psds1 = psds_eval.psds(0.0, 0.0, 100.0)
    # Check that all the psds metrics match
    assert psds1.value == pytest.approx(0.0044306914546640595), \
        "PSDS value was calculated incorrectly"
    # Check that the data has not been messed about with
    assert np.all(pd.util.hash_pandas_object(gt).values == gt_hash)
    assert np.all(pd.util.hash_pandas_object(metadata).values == meta_hash)
    assert np.all(pd.util.hash_pandas_object(det).values == det_hash)
示例#27
0
def test_full_psds():
    """Run a full example of the PSDSEval and test the result"""
    metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t")
    det = pd.read_csv(os.path.join(DATADIR, "test_1.det"), sep="\t")
    gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t")
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt,
                         metadata=metadata)

    # matrix (n_class, n_class) last col/row is world (for FP)
    exp_counts = np.array([[1, 0, 0, 1], [0, 1, 0, 1], [1, 0, 1, 0],
                           [0, 0, 0, 0]])

    psds_eval.add_operating_point(det)
    assert np.all(psds_eval.operating_points.counts[0] == exp_counts), \
        "Expected counts do not match"
    psds = psds_eval.psds(0.0, 0.0, 100.0)
    assert psds.value == pytest.approx(0.9142857142857143), \
        "PSDS was calculated incorrectly"
示例#28
0
def psds_results(predictions, gtruth_df, gtruth_durations):
    try:
        dtc_threshold = 0.5
        gtc_threshold = 0.5
        cttc_threshold = 0.3
        # Instantiate PSDSEval
        psds = PSDSEval(dtc_threshold,
                        gtc_threshold,
                        cttc_threshold,
                        ground_truth=gtruth_df,
                        metadata=gtruth_durations)

        psds.add_operating_point(predictions)
        psds_score = psds.psds(alpha_ct=0, alpha_st=0, max_efpr=100)
        print(f"\nPSD-Score (0, 0, 100): {psds_score.value:.5f}")
        psds_score = psds.psds(alpha_ct=1, alpha_st=0, max_efpr=100)
        print(f"\nPSD-Score (1, 0, 100): {psds_score.value:.5f}")
        psds_score = psds.psds(alpha_ct=0, alpha_st=1, max_efpr=100)
        print(f"\nPSD-Score (0, 1, 100): {psds_score.value:.5f}")
    except psds_eval.psds.PSDSEvalError as e:
        logger.error("psds did not work ....")
        logger.error(e)
示例#29
0
def test_example_2_paper_icassp(metadata):
    """Run PSDSEval on some sample data from the ICASSP paper"""
    det = pd.read_csv(join(DATADIR, "test_2.det"), sep="\t")
    gt = pd.read_csv(join(DATADIR, "test_2.gt"), sep="\t")
    # Record the checksums of the incoming data
    gt_hash = pd.util.hash_pandas_object(gt).values
    det_hash = pd.util.hash_pandas_object(det).values
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt,
                         metadata=metadata)
    exp_counts = np.array([[0, 0, 1, 1], [1, 0, 1, 0], [0, 0, 1, 1],
                           [0, 0, 0, 0]])

    psds_eval.add_operating_point(det)
    assert np.all(psds_eval.operating_points.counts[0] == exp_counts)
    psds1 = psds_eval.psds(0.0, 0.0, 100.0)
    assert psds1.value == pytest.approx(0.29047619047619044), \
        "PSDS value was calculated incorrectly"
    # Check that the data has not been messed about with
    assert np.all(pd.util.hash_pandas_object(gt).values == gt_hash)
    assert np.all(pd.util.hash_pandas_object(det).values == det_hash)
示例#30
0
def test_files_from_dcase(metadata):
    """Run PSDSEval on some example data from DCASE"""
    det = pd.read_csv(join(DATADIR, "Y23R6_ppquxs_247.000_257000.det"),
                      sep="\t")
    gt = pd.read_csv(join(DATADIR, "Y23R6_ppquxs_247.000_257000.gt"), sep="\t")
    # Record the checksums of the incoming data
    gt_hash = pd.util.hash_pandas_object(gt).values
    det_hash = pd.util.hash_pandas_object(det).values
    psds_eval = PSDSEval(dtc_threshold=0.5,
                         gtc_threshold=0.5,
                         cttc_threshold=0.3,
                         ground_truth=gt,
                         metadata=metadata)
    # matrix (n_class, n_class) last col/row is world (for FP)
    exp_counts = np.array([[1., 0., 1.], [1., 4., 0.], [0., 0., 0.]])

    psds_eval.add_operating_point(det)
    assert np.all(psds_eval.operating_points.counts[0] == exp_counts)
    psds1 = psds_eval.psds(0.0, 0.0, 100.0)
    assert psds1.value == pytest.approx(0.6089285714285714), \
        "PSDS value was calculated incorrectly"
    # Check that the data has not been messed about with
    assert np.all(pd.util.hash_pandas_object(gt).values == gt_hash)
    assert np.all(pd.util.hash_pandas_object(det).values == det_hash)