Пример #1
0
def test_model_task_redis_no_dupes_data_unassign_assigned_data(
        test_project_labeled_and_tfidf, test_queue_labeled,
        test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir,
        settings):
    project = test_project_labeled_and_tfidf
    person2 = create_profile('test_profilezzz', 'password',
                             '*****@*****.**')
    person3 = create_profile('test_profile2', 'password',
                             '*****@*****.**')
    ProjectPermissions.objects.create(profile=person2,
                                      project=project,
                                      permission='CODER')
    ProjectPermissions.objects.create(profile=person3,
                                      project=project,
                                      permission='CODER')
    initial_training_set = project.get_current_training_set().set_number
    queue = project.queue_set.get(type="normal")
    queue.length = 40
    queue.save()

    irr_queue = project.queue_set.get(type="irr")
    irr_queue.length = 40
    irr_queue.save()

    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    batch_size = project.batch_size
    fill_queue(queue,
               'random',
               irr_queue,
               irr_percent=project.percentage_irr,
               batch_size=batch_size)

    labels = project.labels.all()
    assignments = get_assignments(project.creator, project, batch_size)
    for assignment in assignments:
        label_data(random.choice(labels), assignment, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 1
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))

    assignments = get_assignments(project.creator, project, 40)
    for assignment in assignments[:batch_size]:
        label_data(random.choice(labels), assignment, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 2
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))

    batch_unassign(project.creator)
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))
Пример #2
0
def test_model_task_redis_no_dupes_data_left_in_queue(
        test_project_labeled_and_tfidf, test_queue_labeled,
        test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir,
        settings):
    project = test_project_labeled_and_tfidf
    initial_training_set = project.get_current_training_set().set_number
    queue = project.queue_set.get(type="normal")
    queue.length = 40
    queue.save()

    irr_queue = project.queue_set.get(type="irr")
    irr_queue.length = 40
    irr_queue.save()

    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    batch_size = project.batch_size
    fill_queue(queue,
               'random',
               irr_queue,
               irr_percent=project.percentage_irr,
               batch_size=batch_size)

    labels = project.labels.all()
    for i in range(int(batch_size * ((100 - project.percentage_irr) / 100))):
        datum = assign_datum(project.creator, project)
        label_data(random.choice(labels), datum, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 1
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))
def test_redis_parse_list_dataids(test_queue, test_redis):
    fill_queue(test_queue, orderby="random")

    data_ids = [d.pk for d in test_queue.data.all()]
    redis_ids = test_redis.lrange(redis_serialize_queue(test_queue), 0, -1)
    parsed_ids = redis_parse_list_dataids(redis_ids)

    assert data_ids.sort() == parsed_ids.sort()
def test_redis_parse_data(test_queue, test_redis):
    fill_queue(test_queue, orderby="random")

    popped_data_key = test_redis.lpop(redis_serialize_queue(test_queue))
    parsed_data = redis_parse_data(popped_data_key)

    assert_obj_exists(Data, {"pk": parsed_data.pk})
    assert_obj_exists(DataQueue, {"data_id": parsed_data.pk})
Пример #5
0
def unassign_datum(datum, profile):
    """Remove a profile's assignment to a datum.

    Re-add the datum to its respective queue in Redis.
    """
    assignment = AssignedData.objects.filter(profile=profile, data=datum).get()

    queue = assignment.queue
    assignment.delete()

    settings.REDIS.lpush(redis_serialize_queue(queue),
                         redis_serialize_data(datum))
Пример #6
0
def pop_queue(queue):
    """Remove a datum from the given queue (in redis and the database) and return it.

    Returns None and does nothing if the queue is empty.

    Client code should prefer pop_first_nonempty_queue() if the
    intent is to pop the first nonempty queue, as it avoids
    concurrency issues.
    """
    # Redis first, since this op is guaranteed to be atomic
    data_id = settings.REDIS.rpop(redis_serialize_queue(queue))

    if data_id is None:
        return None
    else:
        data_id = data_id.decode().split(":")[1]

    data_obj = Data.objects.filter(pk=data_id).get()

    return data_obj
def test_redis_serialize_queue(test_queue):
    queue_key = redis_serialize_queue(test_queue)

    assert queue_key == "queue:" + str(test_queue.pk)
Пример #8
0
def pop_first_nonempty_queue(project, profile=None, type="normal"):
    '''
    Determine which queues are eligible to be popped (and in what order)
    and pass them into redis to have the first nonempty one popped.
    Return a (queue, data item) tuple if one was found; return a (None, None)
    tuple if not.
    '''
    if profile is not None:
        # Use priority to ensure we set profile queues above project queues
        # in the resulting list; break ties by pk
        profile_queues = project.queue_set.filter(profile=profile, type=type)
    else:
        profile_queues = Queue.objects.none()
    profile_queues = profile_queues.annotate(priority=Value(1, IntegerField()))

    project_queues = (project.queue_set.filter(
        profile=None, type=type).annotate(priority=Value(2, IntegerField())))

    eligible_queue_ids = [
        redis_serialize_queue(queue) for queue in (
            profile_queues.union(project_queues).order_by('priority', 'pk'))
    ]

    if type == "irr":
        for queue_id in eligible_queue_ids:
            queue = redis_parse_queue(queue_id.encode())

            # first get the assigned data that was already labeled, or data already assigned
            labeled_irr_data = DataLabel.objects.filter(
                profile=profile).values_list('data', flat=True)
            assigned_data = AssignedData.objects.filter(
                profile=profile, queue=queue).values_list('data', flat=True)
            skipped_data = IRRLog.objects.filter(
                profile=profile, label__isnull=True).values_list('data',
                                                                 flat=True)
            assigned_unlabeled = DataQueue.objects.filter(queue=queue).exclude(
                data__in=labeled_irr_data).exclude(
                    data__in=assigned_data).exclude(data__in=skipped_data)

            # if there are no elements, return none
            if len(assigned_unlabeled) == 0:
                return (None, None)
            else:
                # else, get the first element off the group and return it
                datum = Data.objects.get(pk=assigned_unlabeled[0].data.pk)
                return (queue, datum)
    if len(eligible_queue_ids) == 0:
        return (None, None)

    # Use a custom Lua script here to find the first nonempty queue atomically
    # and pop its first item.  If all queues are empty, return nil.
    script = settings.REDIS.register_script('''
    for _, k in pairs(KEYS) do
      local m = redis.call('LPOP', k)
      if m then
        return {k, m}
      end
    end
    return nil
    ''')

    result = script(keys=eligible_queue_ids)

    if result is None:
        return (None, None)
    else:
        queue_key, data_key = result
        return (redis_parse_queue(queue_key), redis_parse_data(data_key))