def test_init_redis_one_empty_queue(db, test_project, test_redis): add_queue(test_project, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_init_redis_one_nonempty_queue(db, test_project_data, test_redis): queue = add_queue(test_project_data, 10) fill_queue(queue, orderby="random") test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_check_and_trigger_queue_changes_success( setup_celery, test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_redis, tmpdir, settings, test_profile2): project = test_project_labeled_and_tfidf test_queue = test_queue_labeled initial_training_set = project.get_current_training_set() model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) # Add another user to permissions ProjectPermissions.objects.create(profile=test_profile2, project=project, permission='CODER') datum = DataLabel.objects.filter(data__project=project).first().data check = check_and_trigger_model(datum) assert check == 'model running' # Assert model created and saved assert_obj_exists(Model, {'project': project}) model = Model.objects.get(project=project) assert os.path.isfile(model.pickle_path) assert model.pickle_path == os.path.join( str(model_path_temp), 'project_' + str(project.pk) + '_training_' + str(initial_training_set.set_number) + '.pkl') # Assert predictions created predictions = DataPrediction.objects.filter(data__project=project) assert len(predictions) == Data.objects.filter( project=project, labelers=None).count() * project.labels.count() # Assert queue filled and redis sycned batch_size = project.batch_size q = project.queue_set.get(type="normal") q_irr = project.queue_set.get(type="irr") assert (q.data.count() + q_irr.data.count()) == batch_size assert_redis_matches_db(test_redis) num_coders = len(project.projectpermissions_set.all()) + 1 new_queue_length = find_queue_length(batch_size, num_coders) assert q.length == new_queue_length # Assert least confident in queue data_list = get_ordered_data(test_queue.data.all(), 'least confident') previous_lc = data_list[0].datauncertainty_set.get().least_confident for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {'data': datum}) assert datum.datauncertainty_set.get().least_confident <= previous_lc previous_lc = datum.datauncertainty_set.get().least_confident assert (DataQueue.objects.filter(queue=test_queue).count() + DataQueue.objects.filter(queue=test_irr_queue_labeled).count() ) == batch_size # Assert new training set assert project.get_current_training_set() != initial_training_set assert project.get_current_training_set( ).set_number == initial_training_set.set_number + 1
def test_fill_queue_random_predicted_data(test_project_predicted_data, test_queue, test_redis): fill_queue(test_queue, 'random') assert_redis_matches_db(test_redis) assert test_queue.data.count() == test_queue.length for datum in test_queue.data.all(): assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {'data': datum})
def test_init_redis_multiple_queues(db, test_project_data, test_redis): queue = add_queue(test_project_data, 10) fill_queue(queue, orderby="random") add_queue(test_project_data, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_model_task( test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_redis, tmpdir, settings, ): project = test_project_labeled_and_tfidf test_queue = test_queue_labeled initial_training_set = project.get_current_training_set() initial_queue_length = test_queue.length model_path_temp = tmpdir.listdir()[0].mkdir("model_pickles") settings.MODEL_PICKLE_PATH = str(model_path_temp) tasks.send_model_task.delay(project.pk).get() # Assert model created and saved assert_obj_exists(Model, {"project": project}) model = Model.objects.get(project=project) assert os.path.isfile(model.pickle_path) assert model.pickle_path == os.path.join( str(model_path_temp), "project_" + str(project.pk) + "_training_" + str(initial_training_set.set_number) + ".pkl", ) # Assert predictions created predictions = DataPrediction.objects.filter(data__project=project) assert (len(predictions) == Data.objects.filter( project=project, labelers=None).count() * project.labels.count()) # Assert bothe queues are filled and redis sycned assert (test_queue.data.count() + test_irr_queue_labeled.data.count()) == test_queue.length assert_redis_matches_db(test_redis) # Assert queue correct size assert test_queue.length == initial_queue_length # Assert least confident in queue data_list = get_ordered_data(test_queue.data.all(), "least confident") previous_lc = data_list[0].datauncertainty_set.get().least_confident for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {"data": datum}) assert datum.datauncertainty_set.get().least_confident <= previous_lc previous_lc = datum.datauncertainty_set.get().least_confident # Assert new training set assert project.get_current_training_set() != initial_training_set assert (project.get_current_training_set().set_number == initial_training_set.set_number + 1)
def test_check_and_trigger_batched_success(setup_celery, test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_redis, tmpdir, settings): project = test_project_labeled_and_tfidf test_queue = test_queue_labeled initial_training_set = project.get_current_training_set() initial_queue_size = test_queue.length model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) datum = DataLabel.objects.filter(data__project=project).first().data check = check_and_trigger_model(datum) assert check == 'model running' # Assert model created and saved assert_obj_exists(Model, {'project': project}) model = Model.objects.get(project=project) assert os.path.isfile(model.pickle_path) assert model.pickle_path == os.path.join( str(model_path_temp), 'project_' + str(project.pk) + '_training_' + str(initial_training_set.set_number) + '.pkl') # Assert predictions created predictions = DataPrediction.objects.filter(data__project=project) assert len(predictions) == Data.objects.filter( project=project, labelers=None).count() * project.labels.count() # Assert queue filled and redis sycned assert (test_queue.data.count() + test_irr_queue_labeled.data.count()) == test_queue.length assert_redis_matches_db(test_redis) assert test_queue.length == initial_queue_size # Assert least confident in queue data_list = get_ordered_data(test_queue.data.all(), 'least confident') previous_lc = data_list[0].datauncertainty_set.get().least_confident for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {'data': datum}) assert datum.datauncertainty_set.get().least_confident <= previous_lc previous_lc = datum.datauncertainty_set.get().least_confident assert (DataQueue.objects.filter(queue=test_queue).count() + DataQueue.objects.filter(queue=test_irr_queue_labeled).count() ) == TEST_QUEUE_LEN # Assert new training set assert project.get_current_training_set() != initial_training_set assert project.get_current_training_set( ).set_number == initial_training_set.set_number + 1
def test_fill_queue_entropy_predicted_data(test_project_predicted_data, test_queue, test_redis): fill_queue(test_queue, 'entropy') assert_redis_matches_db(test_redis) assert test_queue.data.count() == test_queue.length data_list = get_ordered_data(test_queue.data.all(), 'entropy') previous_e = data_list[0].datauncertainty_set.get().entropy for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {'data': datum}) assert datum.datauncertainty_set.get().entropy <= previous_e previous_e = datum.datauncertainty_set.get().entropy
def test_fill_queue_margin_sampling_predicted_data(test_project_predicted_data, test_queue, test_redis): fill_queue(test_queue, 'margin sampling') assert_redis_matches_db(test_redis) assert test_queue.data.count() == test_queue.length data_list = get_ordered_data(test_queue.data.all(), 'margin sampling') previous_ms = data_list[0].datauncertainty_set.get().margin_sampling for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {'data': datum}) assert datum.datauncertainty_set.get().margin_sampling >= previous_ms previous_ms = datum.datauncertainty_set.get().margin_sampling
def test_fill_queue_least_confident_predicted_data( test_project_predicted_data, test_queue, test_redis ): fill_queue(test_queue, "least confident") assert_redis_matches_db(test_redis) assert test_queue.data.count() == test_queue.length data_list = get_ordered_data(test_queue.data.all(), "least confident") previous_lc = data_list[0].datauncertainty_set.get().least_confident for datum in data_list: assert len(datum.datalabel_set.all()) == 0 assert_obj_exists(DataUncertainty, {"data": datum}) assert datum.datauncertainty_set.get().least_confident <= previous_lc previous_lc = datum.datauncertainty_set.get().least_confident
def test_init_redis_multiple_projects(db, test_project_data, test_redis, test_profile): # Try a mix of multiple queues in multiple projects with # and without data to see if everything initializes as expected. p1_queue1 = add_queue(test_project_data, 10) fill_queue(p1_queue1, orderby="random") add_queue(test_project_data, 10) project2 = create_project("test_project2", test_profile) project2_data = read_test_data_backend( file="./core/data/test_files/test_no_labels.csv") add_data(project2, project2_data) p2_queue1 = add_queue(project2, 10) fill_queue(p2_queue1, orderby="random") add_queue(project2, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_init_redis_empty(db, test_redis): init_redis() assert_redis_matches_db(test_redis)