def process_irr_label(data, label): """This function checks if an irr datum has been labeled by enough people. if it has, then it will attempt to resolve the labels and record the irr history """ # get the number of labels for that data in the project labeled = DataLabel.objects.filter(data=data) skipped = IRRLog.objects.filter(label__isnull=True, data=data) project = data.project current_training_set = project.get_current_training_set() admin_queue = Queue.objects.get(project=project, type="admin") # if there are >= labels or skips than the project calls for if (labeled.count() + skipped.count()) >= project.num_users_irr: # add all labels to IRRLog history_list = [ IRRLog(data=data, profile=d.profile, label=d.label, timestamp=d.timestamp) for d in labeled ] with transaction.atomic(): IRRLog.objects.bulk_create(history_list) # remove all labels from DataLabel and save in list labels = list(labeled.values_list("label", flat=True)) DataLabel.objects.filter(data=data).delete() # check if the labels agree if len(set(labels)) == 1 and skipped.count() == 0: # the data is no longer seen as irr (so it can be in the training set) Data.objects.filter(pk=data.pk).update(irr_ind=False) agree = True # if they do, add a new element to dataLabel with one label # by creator and remove from the irr queue DataLabel.objects.create( data=data, profile=project.creator, label=label, training_set=current_training_set, time_to_label=None, timestamp=timezone.now(), ) DataQueue.objects.filter(data=data).delete() else: agree = False # if they don't, update the data into the admin queue DataQueue.objects.filter(data=data).update(queue=admin_queue) # update redis to reflect the queue changes irr_queue = Queue.objects.get(project=project, type="irr") settings.REDIS.srem(redis_serialize_set(irr_queue), redis_serialize_data(data)) if not agree: settings.REDIS.sadd(redis_serialize_set(admin_queue), redis_serialize_data(data))
def restore_data(request, data_pk): """Move a datum out of the RecycleBin. Args: request: The POST request pk: Primary key of the data Returns: {} """ data = Data.objects.get(pk=data_pk) profile = request.user.profile response = {} # Make sure coder is an admin if project_extras.proj_permission_level(data.project, profile) > 1: # remove it from the recycle bin queue = Queue.objects.get(project=data.project, type="admin") DataQueue.objects.create(data=data, queue=queue) # update redis settings.REDIS.sadd(redis_serialize_set(queue), redis_serialize_data(data)) RecycleBin.objects.get(data=data).delete() else: response["error"] = "Invalid credentials. Must be an admin." return Response(response)
def move_skipped_to_admin_queue(datum, profile, project): """Remove the data from AssignedData and redis. Change the assigned queue to the admin one for this project """ new_queue = Queue.objects.get(project=project, type="admin") with transaction.atomic(): # remove the data from the assignment table assignment = AssignedData.objects.get(data=datum, profile=profile) queue = assignment.queue assignment.delete() # change the queue to the admin one DataQueue.objects.filter(data=datum, queue=queue).update(queue=new_queue) # remove the data from redis settings.REDIS.srem(redis_serialize_set(queue), redis_serialize_data(datum)) settings.REDIS.sadd(redis_serialize_set(new_queue), redis_serialize_data(datum))
def label_admin_label(request, data_pk): """This is called when an admin manually labels a datum on the admin annotation page. It labels a single datum with the given label and profile, with null as the time. Args: request: The POST request data_pk: Primary key of the data Returns: {} """ datum = Data.objects.get(pk=data_pk) project = datum.project label = Label.objects.get(pk=request.data["labelID"]) profile = request.user.profile response = {} current_training_set = project.get_current_training_set() with transaction.atomic(): queue = project.queue_set.get(type="admin") DataLabel.objects.create( data=datum, label=label, profile=profile, training_set=current_training_set, time_to_label=None, timestamp=timezone.now(), ) DataQueue.objects.filter(data=datum, queue=queue).delete() # update redis settings.REDIS.srem(redis_serialize_set(queue), redis_serialize_data(datum)) # make sure the data is no longer irr if datum.irr_ind: Data.objects.filter(pk=datum.pk).update(irr_ind=False) # NOTE: this checks if the model needs to be triggered, but not if the # queues need to be refilled. This is because for something to be in the # admin queue, annotate or skip would have already checked for an empty queue check_and_trigger_model(datum) return Response(response)
def modify_label_to_skip(request, data_pk): """Take a datum that is in the assigneddata queue for that user and place it in the admin queue. Remove it from the assignedData queue. Args: request: The POST request data_pk: Primary key of the data Returns: {} """ data = Data.objects.get(pk=data_pk) profile = request.user.profile response = {} project = data.project old_label = Label.objects.get(pk=request.data["oldLabelID"]) queue = Queue.objects.get(project=project, type="admin") with transaction.atomic(): DataLabel.objects.filter(data=data, label=old_label).delete() if data.irr_ind: # if it was irr, add it to the log if len(IRRLog.objects.filter(data=data, profile=profile)) == 0: IRRLog.objects.create(data=data, profile=profile, label=None, timestamp=timezone.now()) else: # if it's not irr, add it to the admin queue immediately DataQueue.objects.create(data=data, queue=queue) # update redis settings.REDIS.sadd(redis_serialize_set(queue), redis_serialize_data(data)) LabelChangeLog.objects.create( project=project, data=data, profile=profile, old_label=old_label.name, new_label="skip", change_timestamp=timezone.now(), ) return Response(response)
def label_data(label, datum, profile, time): """Record that a given datum has been labeled; remove its assignment, if any. Remove datum from DataQueue and its assocaited redis set. """ current_training_set = datum.project.get_current_training_set() irr_data = datum.irr_ind with transaction.atomic(): DataLabel.objects.create( data=datum, label=label, profile=profile, training_set=current_training_set, time_to_label=time, timestamp=timezone.now(), ) # There's a unique constraint on data/profile, so this is # guaranteed to return one object assignment = AssignedData.objects.filter(data=datum, profile=profile).get() queue = assignment.queue assignment.delete() if not irr_data: DataQueue.objects.filter(data=datum, queue=queue).delete() else: num_history = IRRLog.objects.filter(data=datum).count() # if the IRR history has more than the needed number of labels , it is # already processed so just add this label to the history. if num_history >= datum.project.num_users_irr: IRRLog.objects.create(data=datum, profile=profile, label=label, timestamp=timezone.now()) DataLabel.objects.get(data=datum, profile=profile).delete() else: process_irr_label(datum, label) if not irr_data: settings.REDIS.srem(redis_serialize_set(queue), redis_serialize_data(datum))
def discard_data(request, data_pk): """Move a datum to the RecycleBin. This removes it from the admin dataqueue. This is used only in the skew table by the admin. Args: request: The POST request pk: Primary key of the data Returns: {} """ data = Data.objects.get(pk=data_pk) profile = request.user.profile project = data.project response = {} # Make sure coder is an admin if project_extras.proj_permission_level(data.project, profile) > 1: # remove it from the admin queue queue = Queue.objects.get(project=project, type="admin") DataQueue.objects.get(data=data, queue=queue).delete() # update redis settings.REDIS.srem(redis_serialize_set(queue), redis_serialize_data(data)) IRRLog.objects.filter(data=data).delete() Data.objects.filter(pk=data_pk).update(irr_ind=False) RecycleBin.objects.create(data=data, timestamp=timezone.now()) # remove any IRR log data irr_records = IRRLog.objects.filter(data=data) irr_records.delete() else: response["error"] = "Invalid credentials. Must be an admin." return Response(response)
def test_redis_serialzie_set(test_queue): set_key = redis_serialize_set(test_queue) assert set_key == "set:" + str(test_queue.pk)