示例#1
0
    def _make_response(self, next_task, request, use_task_lock=True):
        """Once next task has chosen, this function triggers inference and prepare the API response"""
        user = request.user
        project = next_task.project

        if use_task_lock:
            # set lock for the task with TTL 3x time more then current average lead time (or 1 hour by default)
            next_task.set_lock(request.user)

        # call machine learning api and format response
        if project.show_collab_predictions:
            for ml_backend in project.ml_backends.all():
                ml_backend.predict_one_task(next_task)

        # serialize task
        context = {
            'request': request,
            'project': project,
            'resolve_uri': True,
            'proxy': bool_from_request(request.GET, 'proxy', True)
        }
        serializer = TaskWithAnnotationsAndPredictionsAndDraftsSerializer(
            next_task, context=context)
        response = serializer.data

        annotations = []
        for c in response.get('annotations', []):
            if c.get('completed_by') == user.id and not (c.get('ground_truth')
                                                         or c.get('honeypot')):
                annotations.append(c)
        response['annotations'] = annotations

        return Response(response)
示例#2
0
class ProjectNextTaskAPI(generics.RetrieveAPIView):
    """get:
    Get next task to label

    Get the next task for labeling. If you enable Machine Learning in
    your project, the response might include a "predictions"
    field. It contains a machine learning prediction result for
    this task.

    """
    permission_classes = (IsAuthenticated, ProjectNextTaskAPIPermissions)
    serializer_class = TaskWithAnnotationsAndPredictionsAndDraftsSerializer  # using it for swagger API docs

    def _get_random_unlocked(self, task_query, upper_limit=None):
        # get random task from task query, ignoring locked tasks
        n = task_query.count()
        if n > 0:
            upper_limit = upper_limit or n
            random_indices = np.random.permutation(upper_limit)
            task_query_only = task_query.only('overlap', 'id')

            for i in random_indices:
                try:
                    task = task_query_only[int(i)]
                except IndexError as exc:
                    logger.error(
                        f'Task query out of range for {int(i)}, count={task_query_only.count()}. '
                        f'Reason: {exc}',
                        exc_info=True)
                except Exception as exc:
                    logger.error(exc, exc_info=True)
                else:
                    try:
                        task = Task.objects.select_for_update(
                            skip_locked=True).get(pk=task.id)
                        if not task.has_lock():
                            return task
                    except Task.DoesNotExist:
                        logger.debug('Task with id {} locked'.format(task.id))

    def _get_first_unlocked(self, tasks_query):
        # Skip tasks that are locked due to being taken by collaborators
        for task in tasks_query.all():
            try:
                task = Task.objects.select_for_update(skip_locked=True).get(
                    pk=task.id)
                if not task.has_lock():
                    return task
            except Task.DoesNotExist:
                logger.debug('Task with id {} locked'.format(task.id))

    def _try_ground_truth(self, tasks, project):
        """Returns task from ground truth set"""
        ground_truth = Annotation.objects.filter(task=OuterRef('pk'),
                                                 ground_truth=True)
        not_solved_tasks_with_ground_truths = tasks.annotate(
            has_ground_truths=Exists(ground_truth)).filter(
                has_ground_truths=True)
        if not_solved_tasks_with_ground_truths.exists():
            if project.sampling == project.SEQUENCE:
                return self._get_first_unlocked(
                    not_solved_tasks_with_ground_truths)
            return self._get_random_unlocked(
                not_solved_tasks_with_ground_truths)

    def _try_tasks_with_overlap(self, tasks):
        """Filter out tasks without overlap (doesn't return next task)"""
        tasks_with_overlap = tasks.filter(overlap__gt=1)
        if tasks_with_overlap.exists():
            return None, tasks_with_overlap
        else:
            return None, tasks.filter(overlap=1)

    def _try_breadth_first(self, tasks):
        """Try to find tasks with maximum amount of annotations, since we are trying to label tasks as fast as possible
        """

        # =======
        # This commented part is trying to solve breadth-first in a bit different way:
        # it selects first task where any amount of annotations have been already created
        # we've left it here to be able to select it through the project settings later
        # =======
        # annotations = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=False)
        # not_solved_tasks_labeling_started = tasks.annotate(labeling_started=Exists(annotations))
        # not_solved_tasks_labeling_started_true = not_solved_tasks_labeling_started.filter(labeling_started=True)
        # if not_solved_tasks_labeling_started_true.exists():
        #     # try to complete tasks that are already in progress
        #     next_task = self._get_random(not_solved_tasks_labeling_started_true)
        #     return next_task

        tasks = tasks.annotate(annotations_count=Count('annotations'))
        max_annotations_count = tasks.aggregate(
            Max('annotations_count'))['annotations_count__max']
        if max_annotations_count == 0:
            # there is no any labeled tasks found
            return

        # find any task with maximal amount of created annotations
        not_solved_tasks_labeling_started = tasks.annotate(
            reach_max_annotations_count=Case(When(
                annotations_count=max_annotations_count, then=Value(True)),
                                             default=Value(False),
                                             output_field=BooleanField()))
        not_solved_tasks_labeling_with_max_annotations = not_solved_tasks_labeling_started.filter(
            reach_max_annotations_count=True)
        if not_solved_tasks_labeling_with_max_annotations.exists():
            # try to complete tasks that are already in progress
            return self._get_random_unlocked(
                not_solved_tasks_labeling_with_max_annotations)

    def _try_uncertainty_sampling(self, tasks, project,
                                  user_solved_tasks_array):
        task_with_current_predictions = tasks.filter(
            predictions__model_version=project.model_version)
        if task_with_current_predictions.exists():
            logger.debug('Use uncertainty sampling')
            # collect all clusters already solved by user, count number of solved task in them
            user_solved_clusters = project.prepared_tasks.filter(
                pk__in=user_solved_tasks_array).annotate(
                    cluster=Max('predictions__cluster')).values_list('cluster',
                                                                     flat=True)
            user_solved_clusters = Counter(user_solved_clusters)
            # order each task by the count of how many tasks solved in it's cluster
            cluster_num_solved_map = [
                When(predictions__cluster=k, then=v)
                for k, v in user_solved_clusters.items()
            ]

            num_tasks_with_current_predictions = task_with_current_predictions.count(
            )  # WARNING! this call doesn't work after consequent annotate
            if cluster_num_solved_map:
                task_with_current_predictions = task_with_current_predictions.annotate(
                    cluster_num_solved=Case(*cluster_num_solved_map,
                                            default=0,
                                            output_field=DecimalField()))
                # next task is chosen from least solved cluster and with lowest prediction score
                possible_next_tasks = task_with_current_predictions.order_by(
                    'cluster_num_solved', 'predictions__score')
            else:
                possible_next_tasks = task_with_current_predictions.order_by(
                    'predictions__score')

            num_annotators = project.annotators().count()
            if num_annotators > 1 and num_tasks_with_current_predictions > 0:
                # try to randomize tasks to avoid concurrent labeling between several annotators
                next_task = self._get_random_unlocked(
                    possible_next_tasks,
                    upper_limit=min(num_annotators + 1,
                                    num_tasks_with_current_predictions))
            else:
                next_task = self._get_first_unlocked(possible_next_tasks)
        else:
            # uncertainty sampling fallback: choose by random sampling
            logger.debug(
                f'Uncertainty sampling fallbacks to random sampling '
                f'(current project.model_version={str(project.model_version)})'
            )
            next_task = self._get_random_unlocked(tasks)
        return next_task

    def _make_response(self, next_task, request, use_task_lock=True):
        """Once next task has chosen, this function triggers inference and prepare the API response"""
        user = request.user
        project = next_task.project

        if use_task_lock:
            # set lock for the task with TTL 3x time more then current average lead time (or 1 hour by default)
            next_task.set_lock(request.user)

        # call machine learning api and format response
        if project.show_collab_predictions:
            for ml_backend in project.ml_backends.all():
                ml_backend.predict_one_task(next_task)

        # serialize task
        context = {
            'request': request,
            'project': project,
            'resolve_uri': True,
            'proxy': bool_from_request(request.GET, 'proxy', True)
        }
        serializer = TaskWithAnnotationsAndPredictionsAndDraftsSerializer(
            next_task, context=context)
        response = serializer.data

        annotations = []
        for c in response.get('annotations', []):
            if c.get('completed_by') == user.id and not (c.get('ground_truth')
                                                         or c.get('honeypot')):
                annotations.append(c)
        response['annotations'] = annotations

        return Response(response)

    @swagger_auto_schema(
        tags=['Projects'],
        responses={
            200: TaskWithAnnotationsAndPredictionsAndDraftsSerializer()
        })
    def get(self, request, *args, **kwargs):
        project = get_object_with_check_and_log(request,
                                                Project,
                                                pk=self.kwargs['pk'])
        # TODO: LSE option
        # if not project.is_published:
        #     raise PermissionDenied('Project is not published.')
        self.check_object_permissions(request, project)
        user = request.user

        # support actions api call from actions/next_task.py
        if hasattr(self, 'prepared_tasks'):
            project.prepared_tasks = self.prepared_tasks
            external_prepared_tasks_used = True
        # get prepared tasks from request params (filters, selected items)
        else:
            project.prepared_tasks = get_prepared_queryset(
                self.request, project)
            external_prepared_tasks_used = False

        # detect solved and not solved tasks
        user_solved_tasks_array = user.annotations.filter(
            ground_truth=False).filter(Q(task__isnull=False)).values_list(
                'task__pk', flat=True)

        with conditional_atomic():
            not_solved_tasks = project.prepared_tasks.\
                exclude(pk__in=user_solved_tasks_array).filter(is_labeled=False)
            not_solved_tasks_count = not_solved_tasks.count()

            # return nothing if there are no tasks remain
            if not_solved_tasks_count == 0:
                raise NotFound(
                    f'There are no tasks remaining to be annotated by the user={user}'
                )
            logger.debug(
                f'{not_solved_tasks_count} tasks that still need to be annotated for user={user}'
            )

            # ordered by data manager
            if external_prepared_tasks_used:
                next_task = not_solved_tasks.first()
                if not next_task:
                    raise NotFound('No more tasks found')
                return self._make_response(next_task, request)

            # If current user has already lock one task - return it (without setting the lock again)
            next_task = Task.get_locked_by(user, project)
            if next_task:
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False)

            if project.show_ground_truth_first:
                logger.debug(
                    f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_ground_truth(not_solved_tasks, project)
                if next_task:
                    return self._make_response(next_task, request)

            if project.show_overlap_first:
                # don't output anything - just filter tasks with overlap
                logger.debug(
                    f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks'
                )
                _, not_solved_tasks = self._try_tasks_with_overlap(
                    not_solved_tasks)

            # if there any tasks in progress (with maximum number of annotations), randomly sampling from them
            logger.debug(
                f'User={request.user} tries depth first from {not_solved_tasks_count} tasks'
            )
            next_task = self._try_breadth_first(not_solved_tasks)
            if next_task:
                return self._make_response(next_task, request)

            if project.sampling == project.UNCERTAINTY:
                logger.debug(
                    f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_uncertainty_sampling(
                    not_solved_tasks, project, user_solved_tasks_array)

            elif project.sampling == project.UNIFORM:
                logger.debug(
                    f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_random_unlocked(not_solved_tasks)

            elif project.sampling == project.SEQUENCE:
                logger.debug(
                    f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_first_unlocked(
                    not_solved_tasks.all().order_by('id'))

            if next_task:
                return self._make_response(next_task, request)
            else:
                raise NotFound(
                    f'There exist some unsolved tasks for the user={user}, but they seem to be locked by another users'
                )
示例#3
0
        return super(ProjectAPI, self).put(request, *args, **kwargs)


@method_decorator(
    name='get',
    decorator=swagger_auto_schema(
        tags=['Projects'],
        operation_summary='Get next task to label',
        operation_description="""
    Get the next task for labeling. If you enable Machine Learning in
    your project, the response might include a "predictions"
    field. It contains a machine learning prediction result for
    this task.
    """,
        responses={
            200: TaskWithAnnotationsAndPredictionsAndDraftsSerializer()
        })
)  # leaving this method decorator info in case we put it back in swagger API docs
class ProjectNextTaskAPI(generics.RetrieveAPIView):

    permission_required = all_permissions.tasks_view
    serializer_class = TaskWithAnnotationsAndPredictionsAndDraftsSerializer  # using it for swagger API docs
    swagger_schema = None  # this endpoint doesn't need to be in swagger API docs

    def _get_random_unlocked(self, task_query, upper_limit=None):
        # get random task from task query, ignoring locked tasks
        n = task_query.count()
        if n > 0:
            upper_limit = upper_limit or n
            random_indices = np.random.permutation(upper_limit)
            task_query_only = task_query.only('overlap', 'id')