def test_no_irr_all_queues(db, test_project_no_irr_data): """A queue containing data from the test project, with length set to the global len.""" normal_q = add_queue(test_project_no_irr_data, TEST_QUEUE_LEN) admin_q = add_queue(test_project_no_irr_data, TEST_QUEUE_LEN, type="admin") irr_q = add_queue(test_project_no_irr_data, MAX_DATA_LEN, type="irr") return [normal_q, admin_q, irr_q]
def test_gnb_queue_list(db, test_project_gnb_data_tfidf): normal_q = add_queue(test_project_gnb_data_tfidf, TEST_QUEUE_LEN) admin_q = add_queue(test_project_gnb_data_tfidf, TEST_QUEUE_LEN, type="admin") irr_q = add_queue(test_project_gnb_data_tfidf, MAX_DATA_LEN, type="irr") return [normal_q, admin_q, irr_q]
def test_init_redis_one_empty_queue(db, test_project, test_redis): add_queue(test_project, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_add_queue_no_profile(test_project): QUEUE_LEN = 10 add_queue(test_project, QUEUE_LEN) assert_obj_exists(Queue, { 'project': test_project, 'length': QUEUE_LEN, 'profile': None })
def test_add_queue_profile(test_project, test_profile): QUEUE_LEN = 10 add_queue(test_project, QUEUE_LEN, profile=test_profile) assert_obj_exists(Queue, { 'project': test_project, 'length': QUEUE_LEN, 'profile': test_profile })
def test_randomforest_queue_list(db, test_project_randomforest_data_tfidf): normal_q = add_queue( test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="normal" ) admin_q = add_queue( test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="admin" ) irr_q = add_queue(test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="irr") return [normal_q, admin_q, irr_q]
def test_init_redis_multiple_queues(db, test_project_data, test_redis): queue = add_queue(test_project_data, 10) fill_queue(queue, orderby="random") add_queue(test_project_data, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_get_nonempty_queue_noprofile(db, test_project_data): queue_len = 10 queue = add_queue(test_project_data, queue_len) queue2 = add_queue(test_project_data, queue_len) assert get_nonempty_queue(test_project_data) is None fill_queue(queue2, orderby="random") assert get_nonempty_queue(test_project_data) == queue2 fill_queue(queue, orderby="random") assert get_nonempty_queue(test_project_data) == queue
def test_get_nonempty_profile_queue(db, test_project_data, test_profile): queue_len = 10 add_queue(test_project_data, queue_len) profile_queue = add_queue(test_project_data, queue_len, profile=test_profile) profile_queue2 = add_queue(test_project_data, queue_len, profile=test_profile) assert get_nonempty_queue(test_project_data, profile=test_profile) is None fill_queue(profile_queue2, orderby="random") assert get_nonempty_queue(test_project_data, profile=test_profile) == profile_queue2 fill_queue(profile_queue, orderby="random") assert get_nonempty_queue(test_project_data, profile=test_profile) == profile_queue
def seed_project(creator, name, description, data_file, label_list, perm_list, classifier): project = Project.objects.create(name=name, description=description, creator=creator, classifier=classifier) TrainingSet.objects.create(project=project, set_number=0) labels = [] for name in label_list: labels.append(Label.objects.create(name=name, project=project)) permissions = [] for perm in perm_list: permissions.append( ProjectPermissions.objects.create(profile=perm, project=project, permission="CODER")) batch_size = 10 * len(labels) project.batch_size = batch_size project.save() num_coders = len(permissions) + 1 q_length = find_queue_length(batch_size, num_coders) queue = add_queue(project=project, length=q_length, type="normal") # Data f_data = read_test_data_backend(file=data_file) data_length = len(f_data) add_queue(project=project, length=data_length, type="admin") irr_queue = add_queue(project=project, length=2000000, type="irr") new_df = add_data(project, f_data) fill_queue(queue, irr_queue=irr_queue, orderby="random", batch_size=batch_size) save_data_file(new_df, project.pk) tasks.send_tfidf_creation_task.apply(args=[project.pk]) tasks.send_check_and_trigger_model_task.apply(args=[project.pk]) return project
def test_init_redis_one_nonempty_queue(db, test_project_data, test_redis): queue = add_queue(test_project_data, 10) fill_queue(queue, orderby="random") test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_pop_only_affects_one_queue(db, test_project_data, test_redis): queue_len = 10 queue = add_queue(test_project_data, queue_len) queue2 = add_queue(test_project_data, queue_len) fill_queue(queue, orderby="random") fill_queue(queue2, orderby="random") datum = pop_queue(queue) assert isinstance(datum, Data) assert test_redis.llen("queue:" + str(queue.pk)) == (queue_len - 1) assert test_redis.scard("set:" + str(queue.pk)) == (queue_len) assert queue.data.count() == queue_len assert test_redis.llen("queue:" + str(queue2.pk)) == queue_len assert test_redis.scard("set:" + str(queue2.pk)) == (queue_len) assert queue2.data.count() == queue_len
def test_pop_empty_queue(db, test_project, test_redis): queue = add_queue(test_project, 10) datum = pop_queue(queue) assert datum is None assert not test_redis.exists("queue:" + str(queue.pk)) assert queue.data.count() == 0
def test_pop_nonempty_queue(db, test_project_data, test_redis): queue_len = 10 queue = add_queue(test_project_data, queue_len) fill_queue(queue, orderby='random') datum = pop_queue(queue) assert isinstance(datum, Data) assert test_redis.llen('queue:' + str(queue.pk)) == (queue_len - 1) assert test_redis.scard('set:' + str(queue.pk)) == (queue_len) assert queue.data.count() == queue_len
def test_pop_first_nonempty_queue_multiple_queues(db, test_project_data, test_queue, test_redis): test_queue2 = add_queue(test_project_data, 10) fill_queue(test_queue2, orderby='random') queue, data = pop_first_nonempty_queue(test_project_data) assert isinstance(queue, Queue) assert queue == test_queue2 fill_queue(test_queue, orderby='random') queue, data = pop_first_nonempty_queue(test_project_data) assert isinstance(queue, Queue) assert queue == test_queue
def test_init_redis_multiple_projects(db, test_project_data, test_redis, test_profile): # Try a mix of multiple queues in multiple projects with # and without data to see if everything initializes as expected. p1_queue1 = add_queue(test_project_data, 10) fill_queue(p1_queue1, orderby="random") add_queue(test_project_data, 10) project2 = create_project("test_project2", test_profile) project2_data = read_test_data_backend( file="./core/data/test_files/test_no_labels.csv") add_data(project2, project2_data) p2_queue1 = add_queue(project2, 10) fill_queue(p2_queue1, orderby="random") add_queue(project2, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def done(self, form_list, form_dict, **kwargs): proj = form_dict['project'] labels = form_dict['labels'] permissions = form_dict['permissions'] advanced = form_dict['advanced'] data = form_dict['data'] codebook_data = form_dict['codebook'] with transaction.atomic(): # Project proj_obj = proj.save(commit=False) advanced_data = advanced.cleaned_data proj_obj.creator = self.request.user.profile # Advanced Options proj_obj.save() proj_pk = proj_obj.pk # Save the codebook file cb_data = codebook_data.cleaned_data['data'] if cb_data != "": cb_filepath = save_codebook_file(cb_data, proj_pk) else: cb_filepath = "" proj_obj.codebook_file = cb_filepath if advanced_data["batch_size"] == 0: batch_size = 10 * len([x for x in labels if x.cleaned_data != {} and not x.cleaned_data['DELETE']]) else: batch_size = advanced_data["batch_size"] proj_obj.batch_size = batch_size proj_obj.learning_method = advanced_data["learning_method"] proj_obj.percentage_irr = advanced_data["percentage_irr"] proj_obj.num_users_irr = advanced_data["num_users_irr"] proj_obj.classifier = advanced_data["classifier"] proj_obj.save() # Training Set TrainingSet.objects.create(project=proj_obj, set_number=0) # Labels labels.instance = proj_obj labels.save() # Permissions permissions.instance = proj_obj permissions.save() # Queue num_coders = len([x for x in permissions if x.cleaned_data != {} and not x.cleaned_data['DELETE']]) + 1 q_length = find_queue_length(batch_size, num_coders) queue = add_queue(project=proj_obj, length=q_length) # Data f_data = data.cleaned_data['data'] add_queue(project=proj_obj, length=2000000, type="admin") irr_queue = add_queue(project=proj_obj, length=2000000, type="irr") upload_data(f_data, proj_obj, queue, irr_queue, batch_size) return HttpResponseRedirect(proj_obj.get_absolute_url())
def test_add_queue_no_profile(test_project): QUEUE_LEN = 10 add_queue(test_project, QUEUE_LEN) assert_obj_exists( Queue, {"project": test_project, "length": QUEUE_LEN, "profile": None} )
def test_add_queue_profile(test_project, test_profile): QUEUE_LEN = 10 add_queue(test_project, QUEUE_LEN, profile=test_profile) assert_obj_exists( Queue, {"project": test_project, "length": QUEUE_LEN, "profile": test_profile} )
def test_admin_queue(db, test_project_data): ''' A queue containing data from the test project, with length set to the global len. ''' return add_queue(test_project_data, TEST_QUEUE_LEN, type="admin")
def test_queue_labeled(db, test_project_labeled): ''' A queue containing data from the test project, with length set to the global len. ''' return add_queue(test_project_labeled, TEST_QUEUE_LEN, type="normal")
def test_queue(db, test_project_data): """A queue containing data from the test project, with length set to the global len.""" return add_queue(test_project_data, TEST_QUEUE_LEN)
def test_irr_queue(db, test_project_data): ''' A queue containing data from the test project, with length set to the global len. ''' return add_queue(test_project_data, MAX_DATA_LEN, type="irr")
def test_irr_queue_labeled(db, test_project_labeled): """A queue containing data from the test project, with length set to the global len.""" return add_queue(test_project_labeled, MAX_DATA_LEN, type="irr")
def test_admin_queue_labeled(db, test_project_labeled): """A queue containing data from the test project, with length set to the global len.""" return add_queue(test_project_labeled, TEST_QUEUE_LEN, type="admin")
def test_profile_queue(db, test_profile, test_project_data): """A queue with test data, associated with the first test profile.""" return add_queue(test_project_data, TEST_QUEUE_LEN, profile=test_profile)
def test_profile_queue2(db, test_profile2, test_project_data): """A queue with test data, associated with an additional test profile. Useful for tests requiring multiple profiles/queues on the same project. """ return add_queue(test_project_data, TEST_QUEUE_LEN, profile=test_profile2)
def test_half_irr_all_queues(db, test_project_half_irr): normal_q = add_queue(test_project_half_irr, TEST_QUEUE_LEN) admin_q = add_queue(test_project_half_irr, TEST_QUEUE_LEN, type="admin") irr_q = add_queue(test_project_half_irr, MAX_DATA_LEN, type="irr") return [normal_q, admin_q, irr_q]