def test_init_redis_one_empty_queue(db, test_project, test_redis):
    add_queue(test_project, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
def test_init_redis_one_nonempty_queue(db, test_project_data, test_redis):
    queue = add_queue(test_project_data, 10)
    fill_queue(queue, orderby="random")

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
示例#3
0
def test_check_and_trigger_queue_changes_success(
        setup_celery, test_project_labeled_and_tfidf, test_queue_labeled,
        test_irr_queue_labeled, test_redis, tmpdir, settings, test_profile2):
    project = test_project_labeled_and_tfidf
    test_queue = test_queue_labeled
    initial_training_set = project.get_current_training_set()
    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    # Add another user to permissions
    ProjectPermissions.objects.create(profile=test_profile2,
                                      project=project,
                                      permission='CODER')

    datum = DataLabel.objects.filter(data__project=project).first().data
    check = check_and_trigger_model(datum)
    assert check == 'model running'

    # Assert model created and saved
    assert_obj_exists(Model, {'project': project})
    model = Model.objects.get(project=project)
    assert os.path.isfile(model.pickle_path)
    assert model.pickle_path == os.path.join(
        str(model_path_temp), 'project_' + str(project.pk) + '_training_' +
        str(initial_training_set.set_number) + '.pkl')

    # Assert predictions created
    predictions = DataPrediction.objects.filter(data__project=project)
    assert len(predictions) == Data.objects.filter(
        project=project, labelers=None).count() * project.labels.count()

    # Assert queue filled and redis sycned
    batch_size = project.batch_size
    q = project.queue_set.get(type="normal")
    q_irr = project.queue_set.get(type="irr")
    assert (q.data.count() + q_irr.data.count()) == batch_size
    assert_redis_matches_db(test_redis)

    num_coders = len(project.projectpermissions_set.all()) + 1
    new_queue_length = find_queue_length(batch_size, num_coders)
    assert q.length == new_queue_length

    # Assert least confident in queue
    data_list = get_ordered_data(test_queue.data.all(), 'least confident')
    previous_lc = data_list[0].datauncertainty_set.get().least_confident
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {'data': datum})
        assert datum.datauncertainty_set.get().least_confident <= previous_lc
        previous_lc = datum.datauncertainty_set.get().least_confident
    assert (DataQueue.objects.filter(queue=test_queue).count() +
            DataQueue.objects.filter(queue=test_irr_queue_labeled).count()
            ) == batch_size

    # Assert new training set
    assert project.get_current_training_set() != initial_training_set
    assert project.get_current_training_set(
    ).set_number == initial_training_set.set_number + 1
示例#4
0
def test_fill_queue_random_predicted_data(test_project_predicted_data,
                                          test_queue, test_redis):
    fill_queue(test_queue, 'random')

    assert_redis_matches_db(test_redis)
    assert test_queue.data.count() == test_queue.length
    for datum in test_queue.data.all():
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {'data': datum})
def test_init_redis_multiple_queues(db, test_project_data, test_redis):
    queue = add_queue(test_project_data, 10)
    fill_queue(queue, orderby="random")

    add_queue(test_project_data, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
示例#6
0
def test_model_task(
    test_project_labeled_and_tfidf,
    test_queue_labeled,
    test_irr_queue_labeled,
    test_redis,
    tmpdir,
    settings,
):
    project = test_project_labeled_and_tfidf
    test_queue = test_queue_labeled
    initial_training_set = project.get_current_training_set()
    initial_queue_length = test_queue.length

    model_path_temp = tmpdir.listdir()[0].mkdir("model_pickles")
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    tasks.send_model_task.delay(project.pk).get()

    # Assert model created and saved
    assert_obj_exists(Model, {"project": project})
    model = Model.objects.get(project=project)
    assert os.path.isfile(model.pickle_path)
    assert model.pickle_path == os.path.join(
        str(model_path_temp),
        "project_" + str(project.pk) + "_training_" +
        str(initial_training_set.set_number) + ".pkl",
    )

    # Assert predictions created
    predictions = DataPrediction.objects.filter(data__project=project)
    assert (len(predictions) == Data.objects.filter(
        project=project, labelers=None).count() * project.labels.count())

    # Assert bothe queues are filled and redis sycned
    assert (test_queue.data.count() +
            test_irr_queue_labeled.data.count()) == test_queue.length
    assert_redis_matches_db(test_redis)

    # Assert queue correct size
    assert test_queue.length == initial_queue_length

    # Assert least confident in queue
    data_list = get_ordered_data(test_queue.data.all(), "least confident")
    previous_lc = data_list[0].datauncertainty_set.get().least_confident
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {"data": datum})
        assert datum.datauncertainty_set.get().least_confident <= previous_lc
        previous_lc = datum.datauncertainty_set.get().least_confident

    # Assert new training set
    assert project.get_current_training_set() != initial_training_set
    assert (project.get_current_training_set().set_number ==
            initial_training_set.set_number + 1)
示例#7
0
def test_check_and_trigger_batched_success(setup_celery,
                                           test_project_labeled_and_tfidf,
                                           test_queue_labeled,
                                           test_irr_queue_labeled, test_redis,
                                           tmpdir, settings):
    project = test_project_labeled_and_tfidf
    test_queue = test_queue_labeled
    initial_training_set = project.get_current_training_set()
    initial_queue_size = test_queue.length
    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    datum = DataLabel.objects.filter(data__project=project).first().data
    check = check_and_trigger_model(datum)
    assert check == 'model running'

    # Assert model created and saved
    assert_obj_exists(Model, {'project': project})
    model = Model.objects.get(project=project)
    assert os.path.isfile(model.pickle_path)
    assert model.pickle_path == os.path.join(
        str(model_path_temp), 'project_' + str(project.pk) + '_training_' +
        str(initial_training_set.set_number) + '.pkl')

    # Assert predictions created
    predictions = DataPrediction.objects.filter(data__project=project)
    assert len(predictions) == Data.objects.filter(
        project=project, labelers=None).count() * project.labels.count()

    # Assert queue filled and redis sycned
    assert (test_queue.data.count() +
            test_irr_queue_labeled.data.count()) == test_queue.length
    assert_redis_matches_db(test_redis)
    assert test_queue.length == initial_queue_size

    # Assert least confident in queue
    data_list = get_ordered_data(test_queue.data.all(), 'least confident')
    previous_lc = data_list[0].datauncertainty_set.get().least_confident
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {'data': datum})
        assert datum.datauncertainty_set.get().least_confident <= previous_lc
        previous_lc = datum.datauncertainty_set.get().least_confident
    assert (DataQueue.objects.filter(queue=test_queue).count() +
            DataQueue.objects.filter(queue=test_irr_queue_labeled).count()
            ) == TEST_QUEUE_LEN

    # Assert new training set
    assert project.get_current_training_set() != initial_training_set
    assert project.get_current_training_set(
    ).set_number == initial_training_set.set_number + 1
示例#8
0
def test_fill_queue_entropy_predicted_data(test_project_predicted_data,
                                           test_queue, test_redis):
    fill_queue(test_queue, 'entropy')

    assert_redis_matches_db(test_redis)
    assert test_queue.data.count() == test_queue.length

    data_list = get_ordered_data(test_queue.data.all(), 'entropy')
    previous_e = data_list[0].datauncertainty_set.get().entropy
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {'data': datum})
        assert datum.datauncertainty_set.get().entropy <= previous_e
        previous_e = datum.datauncertainty_set.get().entropy
示例#9
0
def test_fill_queue_margin_sampling_predicted_data(test_project_predicted_data,
                                                   test_queue, test_redis):
    fill_queue(test_queue, 'margin sampling')

    assert_redis_matches_db(test_redis)
    assert test_queue.data.count() == test_queue.length

    data_list = get_ordered_data(test_queue.data.all(), 'margin sampling')
    previous_ms = data_list[0].datauncertainty_set.get().margin_sampling
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {'data': datum})
        assert datum.datauncertainty_set.get().margin_sampling >= previous_ms
        previous_ms = datum.datauncertainty_set.get().margin_sampling
def test_fill_queue_least_confident_predicted_data(
    test_project_predicted_data, test_queue, test_redis
):
    fill_queue(test_queue, "least confident")

    assert_redis_matches_db(test_redis)
    assert test_queue.data.count() == test_queue.length

    data_list = get_ordered_data(test_queue.data.all(), "least confident")
    previous_lc = data_list[0].datauncertainty_set.get().least_confident
    for datum in data_list:
        assert len(datum.datalabel_set.all()) == 0
        assert_obj_exists(DataUncertainty, {"data": datum})
        assert datum.datauncertainty_set.get().least_confident <= previous_lc
        previous_lc = datum.datauncertainty_set.get().least_confident
def test_init_redis_multiple_projects(db, test_project_data, test_redis,
                                      test_profile):
    # Try a mix of multiple queues in multiple projects with
    # and without data to see if everything initializes as expected.
    p1_queue1 = add_queue(test_project_data, 10)
    fill_queue(p1_queue1, orderby="random")
    add_queue(test_project_data, 10)

    project2 = create_project("test_project2", test_profile)
    project2_data = read_test_data_backend(
        file="./core/data/test_files/test_no_labels.csv")

    add_data(project2, project2_data)
    p2_queue1 = add_queue(project2, 10)
    fill_queue(p2_queue1, orderby="random")
    add_queue(project2, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
def test_init_redis_empty(db, test_redis):
    init_redis()

    assert_redis_matches_db(test_redis)