def predict_many_tasks(self, tasks): self.update_state() if self.not_ready: logger.debug(f'ML backend {self} is not ready') return if isinstance(tasks, list): from tasks.models import Task tasks = Task.objects.filter(id__in=[task.id for task in tasks]) tasks_ser = TaskSimpleSerializer(tasks, many=True).data ml_api_result = self.api.make_predictions(tasks_ser, self.model_version, self.project) if ml_api_result.is_error: logger.error( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return responses = ml_api_result.response['results'] if len(responses) == 0: logger.error( f'ML backend returned empty prediction for project {self}') return # ML Backend doesn't support batch of tasks, do it one by one elif len(responses) == 1: logger.warning( f"'ML backend '{self.title}' doesn't support batch processing of tasks, " f"switched to one-by-one task retrieving") for task in tasks: self.predict_one_task(task) return # wrong result number elif len(responses) != len(tasks_ser): logger.error( f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}' ) predictions = [] for task, response in zip(tasks_ser, responses): predictions.append({ 'task': task['id'], 'result': response['result'], 'score': response.get('score'), 'model_version': self.model_version }) with conditional_atomic(): prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save()
def __predict_one_task(self, task): self.update_state() if self.not_ready: logger.debug(f'ML backend {self} is not ready to predict {task}') return if task.predictions.filter(model_version=self.model_version).exists(): # prediction already exists logger.info( f'Skip creating prediction with ML backend {self} for task {task}: model version ' f'{self.model_version} is up-to-date') return ml_api = self.api task_ser = TaskSimpleSerializer(task).data ml_api_result = ml_api.make_predictions([task_ser], self.model_version, self.project) if ml_api_result.is_error: logger.warning( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return results = ml_api_result.response['results'] if len(results) == 0: logger.error( f'ML backend returned empty prediction for project {self}', extra={'sentry_skip': True}) return prediction_response = results[0] task_id = task_ser['id'] r = prediction_response['result'] score = prediction_response.get('score') with conditional_atomic(): prediction = Prediction.objects.create( result=r, score=safe_float(score), model_version=self.model_version, task_id=task_id, cluster=prediction_response.get('cluster'), neighbors=prediction_response.get('neighbors'), mislabeling=safe_float( prediction_response.get('mislabeling', 0)), ) logger.debug(f'Prediction {prediction} created') return prediction
def get(self, request, *args, **kwargs): project = get_object_with_check_and_log(request, Project, pk=self.kwargs['pk']) # TODO: LSE option # if not project.is_published: # raise PermissionDenied('Project is not published.') self.check_object_permissions(request, project) user = request.user # support actions api call from actions/next_task.py if hasattr(self, 'prepared_tasks'): project.prepared_tasks = self.prepared_tasks external_prepared_tasks_used = True # get prepared tasks from request params (filters, selected items) else: project.prepared_tasks = get_prepared_queryset( self.request, project) external_prepared_tasks_used = False # detect solved and not solved tasks user_solved_tasks_array = user.annotations.filter( ground_truth=False).filter(Q(task__isnull=False)).values_list( 'task__pk', flat=True) with conditional_atomic(): not_solved_tasks = project.prepared_tasks.\ exclude(pk__in=user_solved_tasks_array).filter(is_labeled=False) not_solved_tasks_count = not_solved_tasks.count() # return nothing if there are no tasks remain if not_solved_tasks_count == 0: raise NotFound( f'There are no tasks remaining to be annotated by the user={user}' ) logger.debug( f'{not_solved_tasks_count} tasks that still need to be annotated for user={user}' ) # ordered by data manager if external_prepared_tasks_used: next_task = not_solved_tasks.first() if not next_task: raise NotFound('No more tasks found') return self._make_response(next_task, request) # If current user has already lock one task - return it (without setting the lock again) next_task = Task.get_locked_by(user, project) if next_task: return self._make_response(next_task, request, use_task_lock=False) if project.show_ground_truth_first: logger.debug( f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks' ) next_task = self._try_ground_truth(not_solved_tasks, project) if next_task: return self._make_response(next_task, request) if project.show_overlap_first: # don't output anything - just filter tasks with overlap logger.debug( f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks' ) _, not_solved_tasks = self._try_tasks_with_overlap( not_solved_tasks) # if there any tasks in progress (with maximum number of annotations), randomly sampling from them logger.debug( f'User={request.user} tries depth first from {not_solved_tasks_count} tasks' ) next_task = self._try_breadth_first(not_solved_tasks) if next_task: return self._make_response(next_task, request) if project.sampling == project.UNCERTAINTY: logger.debug( f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks' ) next_task = self._try_uncertainty_sampling( not_solved_tasks, project, user_solved_tasks_array) elif project.sampling == project.UNIFORM: logger.debug( f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_random_unlocked(not_solved_tasks) elif project.sampling == project.SEQUENCE: logger.debug( f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_first_unlocked( not_solved_tasks.all().order_by('id')) if next_task: return self._make_response(next_task, request) else: raise NotFound( f'There exist some unsolved tasks for the user={user}, but they seem to be locked by another users' )
def get(self, request, *args, **kwargs): project = get_object_with_check_and_log(request, Project, pk=self.kwargs['pk']) self.check_object_permissions(request, project) user = request.user # support actions api call from actions/next_task.py if hasattr(self, 'prepared_tasks'): project.prepared_tasks = self.prepared_tasks # get prepared tasks from request params (filters, selected items) else: project.prepared_tasks = get_prepared_queryset( self.request, project) # detect solved and not solved tasks user_solved_tasks_array = user.annotations.filter( ground_truth=False).filter(Q(task__isnull=False)).values_list( 'task__pk', flat=True) with conditional_atomic(): not_solved_tasks = project.prepared_tasks.\ exclude(pk__in=user_solved_tasks_array) # if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True assigned_flag = hasattr(self, 'assignee_flag') and self.assignee_flag if not assigned_flag: not_solved_tasks = not_solved_tasks.annotate( annotation_number=Count('annotations')).filter( annotation_number__lte=project.maximum_annotations) not_solved_tasks_count = not_solved_tasks.count() # return nothing if there are no tasks remain if not_solved_tasks_count == 0: raise NotFound( f'There are no tasks remaining to be annotated by the user={user}' ) logger.debug( f'{not_solved_tasks_count} tasks that still need to be annotated for user={user}' ) # ordered by data manager if assigned_flag: next_task = not_solved_tasks.first() if not next_task: raise NotFound('No more tasks found') return self._make_response(next_task, request, use_task_lock=False) # If current user has already lock one task - return it (without setting the lock again) next_task = Task.get_locked_by(user, tasks=not_solved_tasks) if next_task: return self._make_response(next_task, request, use_task_lock=False) if project.show_ground_truth_first: logger.debug( f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks' ) next_task = self._try_ground_truth(not_solved_tasks, project) if next_task: return self._make_response(next_task, request) if project.show_overlap_first: # don't output anything - just filter tasks with overlap logger.debug( f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks' ) _, not_solved_tasks = self._try_tasks_with_overlap( not_solved_tasks) # don't use this mode for data manager sorting, because the sorting becomes not obvious if project.sampling != project.SEQUENCE: # if there any tasks in progress (with maximum number of annotations), randomly sampling from them logger.debug( f'User={request.user} tries depth first from {not_solved_tasks_count} tasks' ) next_task = self._try_breadth_first(not_solved_tasks) if next_task: return self._make_response(next_task, request) if project.sampling == project.UNCERTAINTY: logger.debug( f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks' ) next_task = self._try_uncertainty_sampling( not_solved_tasks, project, user_solved_tasks_array) elif project.sampling == project.UNIFORM: logger.debug( f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_random_unlocked(not_solved_tasks) elif project.sampling == project.SEQUENCE: logger.debug( f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_first_unlocked(not_solved_tasks) if next_task: return self._make_response(next_task, request) else: raise NotFound( f'There are still some tasks to complete for the user={user}, but they seem to be locked by another user.' )
def get(self, request, *args, **kwargs): project = get_object_with_check_and_log(request, Project, pk=self.kwargs['pk']) self.check_object_permissions(request, project) user = request.user self.current_user = user dm_queue = filters_ordering_selected_items_exist(request.data) # support actions api call from actions/next_task.py if hasattr(self, 'prepared_tasks'): project.prepared_tasks = self.prepared_tasks # get prepared tasks from request params (filters, selected items) else: project.prepared_tasks = get_prepared_queryset( self.request, project) # detect solved and not solved tasks assigned_flag = hasattr(self, 'assignee_flag') and self.assignee_flag user_solved_tasks_array = user.annotations.filter(ground_truth=False) user_solved_tasks_array = user_solved_tasks_array.filter(task__isnull=False)\ .distinct().values_list('task__pk', flat=True) with conditional_atomic(): not_solved_tasks = project.prepared_tasks.\ exclude(pk__in=user_solved_tasks_array) # if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True if not assigned_flag: not_solved_tasks = not_solved_tasks.filter(is_labeled=False) # used only for debug logging, disabled for performance reasons not_solved_tasks_count = 'unknown' next_task = None # ordered by data manager if assigned_flag and not dm_queue: next_task = not_solved_tasks.first() if not next_task: raise NotFound('No more tasks found') return self._make_response(next_task, request, use_task_lock=False, queue='Manually assigned queue') # If current user has already lock one task - return it (without setting the lock again) next_task = Task.get_locked_by(user, tasks=not_solved_tasks) if next_task and not dm_queue: return self._make_response(next_task, request, use_task_lock=False, queue='Task lock') if project.show_ground_truth_first and not dm_queue: logger.debug( f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks' ) next_task = self._try_ground_truth(not_solved_tasks, project) if next_task: return self._make_response(next_task, request, queue='Ground truth queue') queue_info = '' # show tasks with overlap > 1 first if project.show_overlap_first and not dm_queue: # don't output anything - just filter tasks with overlap logger.debug( f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks' ) _, not_solved_tasks = self._try_tasks_with_overlap( not_solved_tasks) queue_info += 'Show overlap first' # if there any tasks in progress (with maximum number of annotations), randomly sampling from them logger.debug( f'User={request.user} tries depth first from {not_solved_tasks_count} tasks' ) if project.maximum_annotations > 1 and not dm_queue: next_task = self._try_breadth_first(not_solved_tasks) if next_task: queue_info += (' & ' if queue_info else '') + 'Breadth first queue' return self._make_response(next_task, request, queue=queue_info) # data manager queue if dm_queue: queue_info += (' & ' if queue_info else '') + 'Data manager queue' logger.debug( f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks' ) next_task = not_solved_tasks.first() elif project.sampling == project.SEQUENCE: queue_info += (' & ' if queue_info else '') + 'Sequence queue' logger.debug( f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_first_unlocked(not_solved_tasks) elif project.sampling == project.UNCERTAINTY: queue_info += (' & ' if queue_info else '') + 'Active learning or random queue' logger.debug( f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks' ) next_task = self._try_uncertainty_sampling( not_solved_tasks, project, user_solved_tasks_array) elif project.sampling == project.UNIFORM: queue_info += (' & ' if queue_info else '') + 'Uniform random queue' logger.debug( f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks' ) next_task = self._get_random_unlocked(not_solved_tasks) if next_task: return self._make_response(next_task, request, queue=queue_info) else: raise NotFound( f'There are still some tasks to complete for the user={user}, ' f'but they seem to be locked by another user.')