def test_dawid_skene_overlap(overlap): data = pd.DataFrame([{ 'task': task_id, 'worker': perf_id, 'label': 'yes' if (perf_id - task_id) % 3 else 'no', } for perf_id in range(overlap) for task_id in range(3)]) ds = DawidSkene(20).fit(data) expected_probas = _make_probas([[task_id, 1 / 3., 2 / 3] for task_id in range(3)]) expected_labels = _make_tasks_labels([[task_id, 'yes'] for task_id in range(3)]) # TODO: check errors_ assert_frame_equal(expected_probas, ds.probas_, check_like=True, atol=0.005) assert_series_equal(expected_labels, ds.labels_, atol=0.005) assert_series_equal(pd.Series({ 'no': 1 / 3, 'yes': 2 / 3 }, name='prior'), ds.priors_, atol=0.005)
def test_aggregate_ds_on_simple(n_iter, tol, simple_answers_df, simple_ground_truth): np.random.seed(42) assert_series_equal( DawidSkene(n_iter=n_iter, tol=tol).fit(simple_answers_df).labels_.sort_index(), simple_ground_truth.sort_index(), )
def test_aggregate_ds_on_toy_ysda(n_iter, tol, toy_answers_df, toy_ground_truth_df): np.random.seed(42) assert_series_equal( DawidSkene(n_iter=n_iter, tol=tol).fit(toy_answers_df).labels_.sort_index(), toy_ground_truth_df.sort_index(), )
def test_dawid_skene_on_empty_input(request, data): ds = DawidSkene(10).fit( pd.DataFrame([], columns=['task', 'worker', 'label'])) assert_frame_equal(pd.DataFrame(), ds.probas_, check_like=True, atol=0.005) assert_frame_equal(pd.DataFrame(), ds.errors_, check_like=True, atol=0.005) assert_series_equal(pd.Series(dtype=float, name='prior'), ds.priors_, atol=0.005) assert_series_equal(pd.Series(dtype=float, name='agg_label'), ds.labels_, atol=0.005)
def test_dawid_skene_step_by_step(request, data, n_iter): probas = request.getfixturevalue(f'probas_iter_{n_iter}') labels = request.getfixturevalue(f'tasks_labels_iter_{n_iter}') errors = request.getfixturevalue(f'errors_iter_{n_iter}') priors = request.getfixturevalue(f'priors_iter_{n_iter}') ds = DawidSkene(n_iter).fit(data) assert_frame_equal(probas, ds.probas_, check_like=True, atol=0.005) assert_frame_equal(errors, ds.errors_, check_like=True, atol=0.005) assert_series_equal(priors, ds.priors_, atol=0.005) assert_series_equal(labels, ds.labels_, atol=0.005)
def test_on_bool_labels(data_with_bool_labels, bool_labels_ground_truth): ds = DawidSkene(20).fit(data_with_bool_labels) assert_series_equal(bool_labels_ground_truth, ds.labels_, atol=0.005)
def track_accuracy_dawid_skene(self): return self._calc_accuracy( DawidSkene(n_iter=5).fit_predict(self.crowd_labels))
def peakmem_dawid_skene(self): DawidSkene(n_iter=5).fit_predict(self.crowd_labels)