def test_cohens_kappa_perc_agreement_no_agreement( setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_labels_half_irr, test_redis, tmpdir, settings, ): """This just tests the kappa and percent if nobody ever agreed.""" project = test_project_half_irr_data labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue( normal_queue, "random", irr_queue, project.percentage_irr, project.batch_size ) # label 5 irr elements but disagree on all of them for i in range(5): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[i % 3], datum, test_profile, 3) label_data(labels[(i + 1) % 3], datum, test_profile2, 3) kappa, perc = cohens_kappa(project) assert round(kappa, 3) == -0.471 assert perc == 0.0
def test_model_task_redis_no_dupes_data_left_in_queue( test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir, settings): project = test_project_labeled_and_tfidf initial_training_set = project.get_current_training_set().set_number queue = project.queue_set.get(type="normal") queue.length = 40 queue.save() irr_queue = project.queue_set.get(type="irr") irr_queue.length = 40 irr_queue.save() model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) batch_size = project.batch_size fill_queue(queue, 'random', irr_queue, irr_percent=project.percentage_irr, batch_size=batch_size) labels = project.labels.all() for i in range(int(batch_size * ((100 - project.percentage_irr) / 100))): datum = assign_datum(project.creator, project) label_data(random.choice(labels), datum, project.creator, 3) tasks.send_model_task.delay(project.pk).get() assert project.get_current_training_set( ).set_number == initial_training_set + 1 redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items))
def test_queue_refill(setup_celery, test_project_data, test_all_queues, test_profile, test_labels, test_redis, tmpdir, settings): ''' Check that the queues refill the way they should. Have one person label everything in a batch. Check that the queue refills but the irr queue now has twice the irr% * batch amount ''' project = test_project_data normal_queue, admin_queue, irr_queue = test_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) irr_count = math.ceil((project.percentage_irr / 100) * project.batch_size) non_irr_count = math.ceil( ((100 - project.percentage_irr) / 100) * project.batch_size) for i in range(non_irr_count): datum = assign_datum(test_profile, project, "normal") assert datum is not None label_data(test_labels[0], datum, test_profile, 3) check_and_trigger_model(datum, test_profile) for i in range(irr_count): datum = assign_datum(test_profile, project, "irr") assert datum is not None label_data(test_labels[0], datum, test_profile, 3) check_and_trigger_model(datum, test_profile) assert DataQueue.objects.filter( queue=normal_queue).count() == non_irr_count assert DataQueue.objects.filter(queue=irr_queue).count() == irr_count * 2
def test_unassign_after_fillqueue(db, test_profile, test_project_data, test_queue, test_labels, test_redis): fill_queue(test_queue, 'random') assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length data = get_assignments(test_profile, test_project_data, 10) assert test_redis.llen('queue:' + str(test_queue.pk)) == (test_queue.length - 10) assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length test_label = test_labels[0] for i in range(5): label_data(test_label, data[i], test_profile, 3) assert test_redis.llen('queue:' + str(test_queue.pk)) == (test_queue.length - 10) assert test_redis.scard('set:' + str(test_queue.pk)) == (test_queue.length - 5) fill_queue(test_queue, 'random') assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length - 5 assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length
def test_model_task_redis_no_dupes_data_unassign_assigned_data( test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir, settings): project = test_project_labeled_and_tfidf person2 = create_profile('test_profilezzz', 'password', '*****@*****.**') person3 = create_profile('test_profile2', 'password', '*****@*****.**') ProjectPermissions.objects.create(profile=person2, project=project, permission='CODER') ProjectPermissions.objects.create(profile=person3, project=project, permission='CODER') initial_training_set = project.get_current_training_set().set_number queue = project.queue_set.get(type="normal") queue.length = 40 queue.save() irr_queue = project.queue_set.get(type="irr") irr_queue.length = 40 irr_queue.save() model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) batch_size = project.batch_size fill_queue(queue, 'random', irr_queue, irr_percent=project.percentage_irr, batch_size=batch_size) labels = project.labels.all() assignments = get_assignments(project.creator, project, batch_size) for assignment in assignments: label_data(random.choice(labels), assignment, project.creator, 3) tasks.send_model_task.delay(project.pk).get() assert project.get_current_training_set( ).set_number == initial_training_set + 1 redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items)) assignments = get_assignments(project.creator, project, 40) for assignment in assignments[:batch_size]: label_data(random.choice(labels), assignment, project.creator, 3) tasks.send_model_task.delay(project.pk).get() assert project.get_current_training_set( ).set_number == initial_training_set + 2 redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items)) batch_unassign(project.creator) redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items))
def test_skip_irr( setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_profile3, test_labels_half_irr, test_redis, tmpdir, settings, ): """This tests the skip function, and see if the data is in the correct places.""" project = test_project_half_irr_data normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue(normal_queue, "random", irr_queue, project.percentage_irr, project.batch_size) # get an irr datum. One should exist. datum = assign_datum(test_profile, project, "irr") assert datum is not None # let one user skip an irr datum. It should not be in adminqueue, should be in irr queue, # should be in irrlog, should be in irr queue, not be in datalabel skip_data(datum, test_profile) assert DataQueue.objects.filter(data=datum, queue=admin_queue).count() == 0 assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() == 1 assert IRRLog.objects.filter(data=datum, profile=test_profile).count() == 1 assert DataLabel.objects.filter(data=datum, profile=test_profile).count() == 0 # let the other user skip the data. It should be in admin queue, # IRRlog (twice), and nowhere else. datum2 = assign_datum(test_profile2, project, "irr") assert datum.pk == datum2.pk skip_data(datum2, test_profile2) assert DataQueue.objects.filter(data=datum, queue=admin_queue).count() == 1 assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() == 0 assert IRRLog.objects.filter(data=datum).count() == 2 assert DataLabel.objects.filter(data=datum).count() == 0 # have two users label an IRR datum then have a third user skip it. # It should be in the IRRLog but not in admin queue or anywhere else. second_datum = assign_datum(test_profile, project, "irr") second_datum2 = assign_datum(test_profile2, project, "irr") assert second_datum.pk != datum.pk assert second_datum.pk == second_datum2.pk second_datum3 = assign_datum(test_profile3, project, "irr") assert second_datum2.pk == second_datum3.pk label_data(test_labels_half_irr[0], second_datum, test_profile, 3) label_data(test_labels_half_irr[0], second_datum2, test_profile2, 3) skip_data(second_datum3, test_profile3) assert DataQueue.objects.filter(data=second_datum3, queue=admin_queue).count() == 0 assert DataQueue.objects.filter(data=second_datum3, queue=irr_queue).count() == 0 assert IRRLog.objects.filter(data=second_datum3).count() == 3 assert DataLabel.objects.filter(data=second_datum3).count() == 1
def test_annotate_irr(setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_profile3, test_labels_half_irr, test_redis, tmpdir, settings): ''' This tests the irr labeling workflow, and checks that the data is in the correct models ''' project = test_project_half_irr_data normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # get an irr datum. One should exist. datum = assign_datum(test_profile, project, "irr") assert datum is not None # let one user label a datum. It should be in DataLabel, not be in IRRLog, # still be in IRR Queue label_data(test_labels_half_irr[0], datum, test_profile, 3) assert DataLabel.objects.filter(data=datum, profile=test_profile).count() > 0 assert IRRLog.objects.filter(data=datum, profile=test_profile).count() == 0 assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() > 0 datum2 = assign_datum(test_profile2, project, "irr") assert datum.pk == datum2.pk datum3 = assign_datum(test_profile3, project, "irr") assert datum.pk == datum3.pk # let other user label the same datum. It should now be in datatable with # creater=profile, be in IRRLog (twice), not be in IRRQueue label_data(test_labels_half_irr[0], datum2, test_profile2, 3) assert DataLabel.objects.filter(data=datum2).count() == 1 assert DataLabel.objects.get(data=datum2).profile.pk == project.creator.pk assert IRRLog.objects.filter(data=datum2).count() == 2 assert DataQueue.objects.filter(data=datum2, queue=irr_queue).count() == 0 # let a third user label the first data something else. It should be in # IRRLog but not overwrite the label from before label_data(test_labels_half_irr[0], datum3, test_profile3, 3) assert IRRLog.objects.filter(data=datum3).count() == 3 assert DataLabel.objects.filter(data=datum3).count() == 1 assert DataLabel.objects.get(data=datum3).profile.pk == project.creator.pk # let two users disagree on a datum. It should be in the admin queue, # not in irr queue, not in datalabel, in irrlog twice second_datum = assign_datum(test_profile, project, "irr") # should be a new datum assert datum.pk != second_datum.pk second_datum2 = assign_datum(test_profile2, project, "irr") label_data(test_labels_half_irr[0], second_datum, test_profile, 3) label_data(test_labels_half_irr[1], second_datum2, test_profile2, 3) assert DataQueue.objects.filter(data=second_datum2, queue=admin_queue).count() == 1 assert DataQueue.objects.filter(data=second_datum2, queue=irr_queue).count() == 0 assert DataLabel.objects.filter(data=second_datum2).count() == 0 assert IRRLog.objects.filter(data=second_datum2).count() == 2
def test_g_naivebayes_classifier( setup_celery, test_project_gnb_data_tfidf, test_gnb_labels, test_gnb_queue_list, test_profile, test_redis, tmpdir, settings, ): """This tests that a project with the Gaussian Naiive Bayes classifier can successfully train and give predictions for a model.""" normal_queue, admin_queue, irr_queue = test_gnb_queue_list labels = test_gnb_labels project = test_project_gnb_data_tfidf active_l = project.learning_method batch_size = project.batch_size initial_training_set = project.get_current_training_set() model_path_temp = tmpdir.listdir()[0].mkdir("model_pickles") settings.MODEL_PICKLE_PATH = str(model_path_temp) assert project.classifier == "gnb" assert active_l == "least confident" fill_queue(normal_queue, "random") assert DataQueue.objects.filter(queue=normal_queue).count() == batch_size for i in range(batch_size): datum = assign_datum(test_profile, project) label_data(labels[i % 3], datum, test_profile, 3) ret_str = check_and_trigger_model(datum) assert ret_str == "model running" # Assert model created and saved assert_obj_exists(Model, {"project": project}) model = Model.objects.get(project=project) assert os.path.isfile(model.pickle_path) assert model.pickle_path == os.path.join( str(model_path_temp), "project_" + str(project.pk) + "_training_" + str(initial_training_set.set_number) + ".pkl", ) # Assert predictions created predictions = DataPrediction.objects.filter(data__project=project) assert ( len(predictions) == Data.objects.filter(project=project, labelers=None).count() * project.labels.count() )
def label_project(project, profile, num_labels): labels = project.labels.all() current_training_set = project.get_current_training_set() assignments = get_assignments(profile, project, num_labels) for i in range(min(len(labels), len(assignments))): label_data(labels[i], assignments[i], profile, random.randint(0, 25)) for assignment in assignments[len(labels):]: label_data(random.choice(labels), assignment, profile, random.randint(0, 25)) task_num = tasks.send_model_task.apply(args=[project.pk]) current_training_set.celery_task_id = task_num current_training_set.save()
def test_randomforest_classifier(setup_celery, test_project_randomforest_data_tfidf, test_randomforest_labels, test_randomforest_queue_list, test_profile, test_redis, tmpdir, settings): ''' This tests that a project with the random forest classifier can successfully train and give predictions for a model ''' normal_queue, admin_queue, irr_queue = test_randomforest_queue_list labels = test_randomforest_labels project = test_project_randomforest_data_tfidf active_l = project.learning_method batch_size = project.batch_size initial_training_set = project.get_current_training_set() model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) assert project.classifier == "random forest" assert active_l == 'least confident' fill_queue(normal_queue, 'random') assert DataQueue.objects.filter(queue=normal_queue).count() == batch_size for i in range(batch_size): datum = assign_datum(test_profile, project) label_data(labels[i % 3], datum, test_profile, 3) ret_str = check_and_trigger_model(datum) assert ret_str == 'model running' # Assert model created and saved assert_obj_exists(Model, {'project': project}) model = Model.objects.get(project=project) assert os.path.isfile(model.pickle_path) assert model.pickle_path == os.path.join( str(model_path_temp), 'project_' + str(project.pk) + '_training_' + str(initial_training_set.set_number) + '.pkl') # Assert predictions created predictions = DataPrediction.objects.filter(data__project=project) assert len(predictions) == Data.objects.filter( project=project, labelers=None).count() * project.labels.count()
def annotate_data(request, data_pk): """Annotate a single datum which is in the assigneddata queue given the user, data_id, and label_id. This will remove it from assigneddata, remove it from dataqueue and add it to labeleddata. Also check if project is ready to have model run, if so start that process. Args: request: The POST request data_pk: Primary key of the data Returns: {} """ data = Data.objects.get(pk=data_pk) project = data.project profile = request.user.profile response = {} label = Label.objects.get(pk=request.data['labelID']) labeling_time = request.data['labeling_time'] num_history = IRRLog.objects.filter(data=data).count() if RecycleBin.objects.filter(data=data).count() > 0: # this data is no longer in use. delete it assignment = AssignedData.objects.get(data=data, profile=profile) assignment.delete() elif num_history >= project.num_users_irr: # if the IRR history has more than the needed number of labels , it is # already processed so just add this label to the history. IRRLog.objects.create(data=data, profile=profile, label=label, timestamp=timezone.now()) assignment = AssignedData.objects.get(data=data, profile=profile) assignment.delete() else: label_data(label, data, profile, labeling_time) if data.irr_ind: # if it is reliability data, run processing step process_irr_label(data, label) # for all data, check if we need to refill queue check_and_trigger_model(data, profile) return Response(response)
def test_check_and_trigger_model_first_labeled( setup_celery, test_project_data, test_labels, test_queue, test_profile ): initial_training_set = test_project_data.get_current_training_set() fill_queue(test_queue, orderby="random") datum = assign_datum(test_profile, test_queue.project) test_label = test_labels[0] label_data(test_label, datum, test_profile, 3) check = check_and_trigger_model(datum) assert check == "no trigger" assert test_project_data.get_current_training_set() == initial_training_set assert test_project_data.model_set.count() == 0 assert DataPrediction.objects.filter(data__project=test_project_data).count() == 0 assert DataUncertainty.objects.filter(data__project=test_project_data).count() == 0 assert DataQueue.objects.filter(queue=test_queue).count() == TEST_QUEUE_LEN - 1
def test_label_data(db, test_profile, test_queue, test_redis): fill_queue(test_queue, orderby='random') datum = assign_datum(test_profile, test_queue.project) test_label = Label.objects.create(name='test', project=test_queue.project) label_data(test_label, datum, test_profile, 3) # Make sure the label was properly recorded assert datum in test_profile.labeled_data.all() assert_obj_exists( DataLabel, { 'data': datum, 'profile': test_profile, 'label': test_label, 'time_to_label': 3 }) # Make sure the assignment was removed assert not AssignedData.objects.filter( profile=test_profile, data=datum, queue=test_queue).exists()
def test_label_data(db, test_profile, test_queue, test_redis): fill_queue(test_queue, orderby="random") datum = assign_datum(test_profile, test_queue.project) test_label = Label.objects.create(name="test", project=test_queue.project) label_data(test_label, datum, test_profile, 3) # Make sure the label was properly recorded assert datum in test_profile.labeled_data.all() assert_obj_exists( DataLabel, { "data": datum, "profile": test_profile, "label": test_label, "time_to_label": 3, }, ) # Make sure the assignment was removed assert not AssignedData.objects.filter( profile=test_profile, data=datum, queue=test_queue).exists()
def test_fleiss_kappa_perc_agreement( setup_celery, test_project_all_irr_3_coders_data, test_all_irr_3_coders_all_queues, test_profile, test_profile2, test_profile3, test_labels_all_irr_3_coders, test_redis, tmpdir, settings): ''' This tests the results of the Fleiss's kappa function when fed different situations ''' project = test_project_all_irr_3_coders_data labels = test_labels_all_irr_3_coders normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # first check that an error is thrown if there is no data with pytest.raises(ValueError) as excinfo: fleiss_kappa(project) assert 'No irr data' in str(excinfo.value) # next, check that the same error happens if only two have labeled it datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) with pytest.raises(ValueError) as excinfo: fleiss_kappa(project) assert 'No irr data' in str(excinfo.value) # have everyone label a datum differenty # [1 1 1], kappa = -0.5, pa = 0 label_data(labels[2], datum, test_profile3, 3) kappa, perc = fleiss_kappa(project) assert round(kappa, 1) == -0.5 assert perc == 0.0 # have only two people label a datum the same and check that kappa is the same datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) kappa, perc = fleiss_kappa(project) assert round(kappa, 1) == -0.5 assert perc == 0.0 # have last person label datum the same # [[1 1 1],[3 0 0]], kappa = 0.0, pa = 0.5 label_data(labels[0], datum, test_profile3, 3) kappa, perc = fleiss_kappa(project) assert round(kappa, 2) == 0.0 assert perc == 0.5 # have two people agree and one disagree # [[1 1 1],[3 0 0],[2 1 0]], kappa = -0.13, pa=0.333 datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) label_data(labels[1], datum, test_profile3, 3) kappa, perc = fleiss_kappa(project) assert round(kappa, 2) == -0.13 assert round(perc, 2) == 0.33 # repeat previous step with slight variation # [[1 1 1],[3 0 0],[2 1 0],[1 2 0]], kappa = -0.08, pa=0.25 datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) label_data(labels[1], datum, test_profile3, 3) kappa, perc = fleiss_kappa(project) assert round(kappa, 2) == -0.08 assert round(perc, 2) == 0.25
def test_cohens_kappa_perc_agreement(setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_labels_half_irr, test_redis, tmpdir, settings): ''' want to check several different configurations including empty, no agreement Should throw an error if no irr data processed yet ''' project = test_project_half_irr_data labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # check that before anything is labeled, an error is thrown with pytest.raises(ValueError) as excinfo: cohens_kappa(project) assert 'No irr data' in str(excinfo.value) # have two labelers label two datum the same. for i in range(2): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) # kappa requires at least two labels be represented with pytest.raises(ValueError) as excinfo: cohens_kappa(project) assert 'Need at least two labels represented' in str(excinfo.value) datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[1], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) # Now kappa should be 1 kappa, perc = cohens_kappa(project) assert kappa == 1.0 assert perc == 1.0 # have two labelers disagree on two datum check the value datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[1], datum, test_profile, 3) label_data(labels[2], datum, test_profile2, 3) datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) kappa, perc = cohens_kappa(project) assert round(kappa, 3) == 0.333 assert perc == 0.6
def test_percent_agreement_table(setup_celery, test_project_all_irr_3_coders_data, test_all_irr_3_coders_all_queues, test_profile, test_profile2, test_profile3, test_labels_all_irr_3_coders, test_redis, tmpdir, settings): ''' This tests the percent agreement table ''' project = test_project_all_irr_3_coders_data ProjectPermissions.objects.create(profile=test_profile2, project=project, permission='CODER') ProjectPermissions.objects.create(profile=test_profile3, project=project, permission='CODER') labels = test_labels_all_irr_3_coders normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) table_data_perc = pd.DataFrame( perc_agreement_table_data(project))["Percent Agreement"].tolist() # first test that it has "No Samples" for the percent for all assert len(table_data_perc) == 3 assert (table_data_perc[0] == "No samples") and (table_data_perc[1] == "No samples") and (table_data_perc[2] == "No samples") # First have everyone give same label, should be 100% for all datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) label_data(labels[0], datum, test_profile3, 3) table_data_perc = pd.DataFrame( perc_agreement_table_data(project))["Percent Agreement"].tolist() assert (table_data_perc[0] == "100.0%") and (table_data_perc[1] == "100.0%") and (table_data_perc[2] == "100.0%") # Next have user1 = user2 != user3, Check values datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) label_data(labels[1], datum, test_profile3, 3) table_data_perc = pd.DataFrame( perc_agreement_table_data(project))["Percent Agreement"].tolist() # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof] assert (table_data_perc[0] == "50.0%") and (table_data_perc[1] == "100.0%") and (table_data_perc[2] == "50.0%") # Next have all users skip. Should count as disagreement. datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") skip_data(datum, test_profile) skip_data(datum, test_profile2) skip_data(datum, test_profile3) table_data_perc = pd.DataFrame( perc_agreement_table_data(project))["Percent Agreement"].tolist() # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof] assert (table_data_perc[0] == "33.3%") and (table_data_perc[1] == "66.7%") and (table_data_perc[2] == "33.3%") # Lastly have two users label. Should be the same as before datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") assign_datum(test_profile3, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) table_data_perc = pd.DataFrame( perc_agreement_table_data(project))["Percent Agreement"].tolist() # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof] assert (table_data_perc[0] == "33.3%") and (table_data_perc[1] == "66.7%") and (table_data_perc[2] == "33.3%")
def test_heatmap_data(setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_labels_half_irr, test_redis, tmpdir, settings): ''' These tests check that the heatmap accurately reflects the data ''' project = test_project_half_irr_data ProjectPermissions.objects.create(profile=test_profile, project=project, permission='CODER') ProjectPermissions.objects.create(profile=test_profile2, project=project, permission='CODER') labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) combo1 = str(test_profile.pk) + "_" + str(test_profile2.pk) same1 = str(test_profile.pk) + "_" + str(test_profile.pk) same2 = str(test_profile2.pk) + "_" + str(test_profile2.pk) # don't label anything. The heatmap shoud have all zeros for user pair heatmap = irr_heatmap_data(project) assert combo1 in heatmap heatmap = heatmap[combo1] counts = pd.DataFrame(heatmap)["count"].tolist() assert np.all(np.equal(counts, [0] * len(counts))) # have one user skip 3 things and another label them. for i in range(3): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[i], datum, test_profile, 3) skip_data(datum, test_profile2) # check that user1-user1 map is I3 heatmap = irr_heatmap_data(project) same_frame = pd.DataFrame(heatmap[same1]) assert same_frame.loc[(same_frame["label1"] == labels[0].name) & ( same_frame["label2"] == labels[0].name)]["count"].tolist()[0] == 1 assert same_frame.loc[(same_frame["label1"] == labels[1].name) & ( same_frame["label2"] == labels[1].name)]["count"].tolist()[0] == 1 assert same_frame.loc[(same_frame["label1"] == labels[2].name) & ( same_frame["label2"] == labels[2].name)]["count"].tolist()[0] == 1 assert np.sum(same_frame["count"].tolist()) == 3 # check the second user only has 3 in the skip-skip spot same_frame2 = pd.DataFrame(heatmap[same2]) assert same_frame2.loc[(same_frame2["label1"] == "Skip") & ( same_frame["label2"] == "Skip")]["count"].tolist()[0] == 3 assert np.sum(same_frame2["count"].tolist()) == 3 # check that the between-user heatmap has skip-label = 1 for each label heatmap = irr_heatmap_data(project) heatmap = pd.DataFrame(heatmap[combo1]) assert heatmap.loc[(heatmap["label1"] == labels[0].name) & (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1 assert heatmap.loc[(heatmap["label1"] == labels[1].name) & (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1 assert heatmap.loc[(heatmap["label1"] == labels[2].name) & (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1 assert np.sum(heatmap["count"].tolist()) == 3 # have users agree on 5 labels and datums, check heatmap for i in range(5): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[i % 3], datum, test_profile, 3) label_data(labels[i % 3], datum, test_profile2, 3) heatmap = irr_heatmap_data(project) heatmap = pd.DataFrame(heatmap[combo1]) assert heatmap.loc[(heatmap["label1"] == labels[0].name) & ( heatmap["label2"] == labels[0].name)]["count"].tolist()[0] == 2 assert heatmap.loc[(heatmap["label1"] == labels[1].name) & ( heatmap["label2"] == labels[1].name)]["count"].tolist()[0] == 2 assert heatmap.loc[(heatmap["label1"] == labels[2].name) & ( heatmap["label2"] == labels[2].name)]["count"].tolist()[0] == 1 assert np.sum(heatmap["count"].tolist()) == 8 # have one user label something, show the heatmap hasn't changed datum = assign_datum(test_profile, project, "irr") label_data(labels[0], datum, test_profile, 3) heatmap = irr_heatmap_data(project) same_map = heatmap[same1] assert np.sum(pd.DataFrame(same_map)["count"].tolist()) == 8 heatmap = pd.DataFrame(heatmap[combo1]) assert np.sum(pd.DataFrame(heatmap)["count"].tolist()) == 8
def test_all_irr(setup_celery, test_project_all_irr_3_coders_data, test_all_irr_3_coders_all_queues, test_profile, test_profile2, test_profile3, test_labels_all_irr_3_coders, test_redis, tmpdir, settings): ''' This tests the case with 100% IRR and triple labeling required ''' project = test_project_all_irr_3_coders_data labels = test_labels_all_irr_3_coders normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # check the normal queue is empty and the irr queue is full assert DataQueue.objects.filter( queue=irr_queue).count() == project.batch_size assert DataQueue.objects.filter(queue=normal_queue).count() == 0 # check everything in the irr queue has irr_ind = true assert DataQueue.objects.filter( queue=irr_queue, data__irr_ind=True).count() == project.batch_size # have one person label three datum and check that they are still in the queue datum = assign_datum(test_profile, project, "irr") second_datum = assign_datum(test_profile, project, "irr") third_datum = assign_datum(test_profile, project, "irr") assert datum.pk != second_datum.pk assert third_datum.pk != second_datum.pk label_data(labels[0], datum, test_profile, 3) label_data(labels[0], second_datum, test_profile, 3) label_data(labels[0], third_datum, test_profile, 3) assert DataQueue.objects.filter( queue=irr_queue, data__in=[datum, second_datum, third_datum]).count() == 3 # have one person skip all three datum, and check that they are still in the irr queue, in irrlog, and in datalabel, but not in admin queue datum2 = assign_datum(test_profile2, project, "irr") second_datum2 = assign_datum(test_profile2, project, "irr") third_datum2 = assign_datum(test_profile2, project, "irr") assert datum.pk == datum2.pk assert second_datum.pk == second_datum2.pk assert third_datum.pk == third_datum2.pk skip_data(datum2, test_profile2) skip_data(second_datum2, test_profile2) skip_data(third_datum2, test_profile2) assert DataQueue.objects.filter( data__in=[datum2, second_datum2, third_datum2], queue=irr_queue).count() == 3 assert DataQueue.objects.filter( data__in=[datum2, second_datum2, third_datum2], queue=admin_queue).count() == 0 assert IRRLog.objects.filter( data__in=[datum2, second_datum2, third_datum2]).count() == 3 assert DataLabel.objects.filter( data__in=[datum2, second_datum2, third_datum2]).count() == 3 # have the third person label all three datum and check that they are in the log and admin queue, but not in irr queue or datalabel datum3 = assign_datum(test_profile3, project, "irr") second_datum3 = assign_datum(test_profile3, project, "irr") third_datum3 = assign_datum(test_profile3, project, "irr") assert datum.pk == datum3.pk assert second_datum.pk == second_datum3.pk assert third_datum.pk == third_datum3.pk label_data(labels[0], datum3, test_profile3, 3) label_data(labels[1], second_datum3, test_profile3, 3) label_data(labels[0], third_datum3, test_profile3, 3) assert DataQueue.objects.filter( data__in=[datum3, second_datum3, third_datum3], queue=irr_queue).count() == 0 assert DataQueue.objects.filter( data__in=[datum3, second_datum3, third_datum3], queue=admin_queue).count() == 3 assert IRRLog.objects.filter( data__in=[datum3, second_datum3, third_datum3]).count() == 9 assert DataLabel.objects.filter( data__in=[datum3, second_datum3, third_datum3]).count() == 0