示例#1
0
    def create(self, request, *args, **kwargs):
        # check project permissions
        project = self.get_object()
        tasks_ids = set(Task.objects.filter(project=project).values_list('id', flat=True))
        logger.debug(f'Importing {len(self.request.data)} predictions to project {project} with {len(tasks_ids)} tasks')
        predictions = []
        for item in self.request.data:
            if item.get('task') not in tasks_ids:
                raise LabelStudioValidationErrorSentryIgnored(
                    f'{item} contains invalid "task" field: corresponding task ID couldn\'t be retrieved '
                    f'from project {project} tasks')
            predictions.append(Prediction(
                task_id=item['task'],
                result=Prediction.prepare_prediction_result(item.get('result'), project),
                score=item.get('score'),
                model_version=item.get('model_version', 'undefined')
            ))
        predictions_obj = Prediction.objects.bulk_create(predictions, batch_size=settings.BATCH_SIZE)

        return Response({'created': len(predictions_obj)}, status=status.HTTP_201_CREATED)
示例#2
0
    def create(self, validated_data):
        """ Create Tasks and Annotations in bulk
        """
        db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data
        logging.info(f'Try to serialize tasks with annotations, data len = {len(validated_data)}')
        user = self.context.get('user', None)

        organization = user.active_organization \
            if not self.project.created_by.active_organization else self.project.created_by.active_organization
        members_email_to_id = dict(organization.members.values_list('user__email', 'user__id'))
        members_ids = set(members_email_to_id.values())
        logger.debug(f"{len(members_email_to_id)} members found in organization {organization}")

        # to be sure we add tasks with annotations at the same time
        with transaction.atomic():

            # extract annotations and predictions
            task_annotations, task_predictions = [], []
            for task in validated_tasks:
                annotations = task.pop('annotations', [])
                # insert a valid "completed_by_id" by existing member
                self._insert_valid_completed_by_id_or_raise(
                    annotations, members_email_to_id, members_ids, user or self.project.created_by)
                predictions = task.pop('predictions', [])
                task_annotations.append(annotations)
                task_predictions.append(predictions)

            # add tasks first
            max_overlap = self.project.maximum_annotations

            # identify max inner id
            tasks = Task.objects.filter(project=self.project)
            max_inner_id = (tasks.order_by("-inner_id")[0].inner_id + 1) if tasks else 1

            for i, task in enumerate(validated_tasks):
                t = Task(
                    project=self.project,
                    data=task['data'],
                    meta=task.get('meta', {}),
                    overlap=max_overlap,
                    is_labeled=len(task_annotations[i]) >= max_overlap,
                    file_upload_id=task.get('file_upload_id'),
                    inner_id=max_inner_id + i
                )
                db_tasks.append(t)

            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_tasks = []
                try:
                    last_task = Task.objects.latest('id')
                    current_id = last_task.id + 1
                except Task.DoesNotExist:
                    current_id = 1

                for task in db_tasks:
                    task.id = current_id
                    current_id += 1
                self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE)
            else:
                self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE)
            logging.info(f'Tasks serialization success, len = {len(self.db_tasks)}')

            # add annotations
            for i, annotations in enumerate(task_annotations):
                for annotation in annotations:
                    if not isinstance(annotation, dict):
                        continue
                        
                    # support both "ground_truth" and "ground_truth"
                    ground_truth = annotation.pop('ground_truth', True)
                    was_cancelled = annotation.pop('was_cancelled', False)
                    lead_time = annotation.pop('lead_time', None)

                    db_annotations.append(Annotation(task=self.db_tasks[i],
                                                     ground_truth=ground_truth,
                                                     was_cancelled=was_cancelled,
                                                     completed_by_id=annotation['completed_by_id'],
                                                     result=annotation['result'],
                                                     lead_time=lead_time))

            # add predictions
            last_model_version = None
            for i, predictions in enumerate(task_predictions):
                for prediction in predictions:
                    if not isinstance(prediction, dict):
                        continue

                    # we need to call result normalizer here since "bulk_create" doesn't call save() method
                    result = Prediction.prepare_prediction_result(prediction['result'], self.project)
                    prediction_score = prediction.get('score')
                    if prediction_score is not None:
                        try:
                            prediction_score = float(prediction_score)
                        except ValueError as exc:
                            logger.error(
                                f'Can\'t upload prediction score: should be in float format. Reason: {exc}.'
                                f'Fallback to score=None', exc_info=True)
                            prediction_score = None

                    last_model_version = prediction.get('model_version', 'undefined')
                    db_predictions.append(Prediction(task=self.db_tasks[i],
                                                     result=result,
                                                     score=prediction_score,
                                                     model_version=last_model_version))

            # annotations: DB bulk create
            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_annotations = []
                try:
                    last_annotation = Annotation.objects.latest('id')
                    current_id = last_annotation.id + 1
                except Annotation.DoesNotExist:
                    current_id = 1

                for annotation in db_annotations:
                    annotation.id = current_id
                    current_id += 1
                self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE)
            else:
                self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE)
            logging.info(f'Annotations serialization success, len = {len(self.db_annotations)}')

            # predictions: DB bulk create
            self.db_predictions = Prediction.objects.bulk_create(db_predictions, batch_size=settings.BATCH_SIZE)
            logging.info(f'Predictions serialization success, len = {len(self.db_predictions)}')

            # renew project model version if it's empty
            if not self.project.model_version and last_model_version is not None:
                self.project.model_version = last_model_version
                self.project.save()

        self.post_process_annotations(self.db_annotations)
        return db_tasks