def create(self, request, *args, **kwargs): # check project permissions project = self.get_object() tasks_ids = set(Task.objects.filter(project=project).values_list('id', flat=True)) logger.debug(f'Importing {len(self.request.data)} predictions to project {project} with {len(tasks_ids)} tasks') predictions = [] for item in self.request.data: if item.get('task') not in tasks_ids: raise LabelStudioValidationErrorSentryIgnored( f'{item} contains invalid "task" field: corresponding task ID couldn\'t be retrieved ' f'from project {project} tasks') predictions.append(Prediction( task_id=item['task'], result=Prediction.prepare_prediction_result(item.get('result'), project), score=item.get('score'), model_version=item.get('model_version', 'undefined') )) predictions_obj = Prediction.objects.bulk_create(predictions, batch_size=settings.BATCH_SIZE) return Response({'created': len(predictions_obj)}, status=status.HTTP_201_CREATED)
def create(self, validated_data): """ Create Tasks and Annotations in bulk """ db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data logging.info(f'Try to serialize tasks with annotations, data len = {len(validated_data)}') user = self.context.get('user', None) organization = user.active_organization \ if not self.project.created_by.active_organization else self.project.created_by.active_organization members_email_to_id = dict(organization.members.values_list('user__email', 'user__id')) members_ids = set(members_email_to_id.values()) logger.debug(f"{len(members_email_to_id)} members found in organization {organization}") # to be sure we add tasks with annotations at the same time with transaction.atomic(): # extract annotations and predictions task_annotations, task_predictions = [], [] for task in validated_tasks: annotations = task.pop('annotations', []) # insert a valid "completed_by_id" by existing member self._insert_valid_completed_by_id_or_raise( annotations, members_email_to_id, members_ids, user or self.project.created_by) predictions = task.pop('predictions', []) task_annotations.append(annotations) task_predictions.append(predictions) # add tasks first max_overlap = self.project.maximum_annotations # identify max inner id tasks = Task.objects.filter(project=self.project) max_inner_id = (tasks.order_by("-inner_id")[0].inner_id + 1) if tasks else 1 for i, task in enumerate(validated_tasks): t = Task( project=self.project, data=task['data'], meta=task.get('meta', {}), overlap=max_overlap, is_labeled=len(task_annotations[i]) >= max_overlap, file_upload_id=task.get('file_upload_id'), inner_id=max_inner_id + i ) db_tasks.append(t) if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_tasks = [] try: last_task = Task.objects.latest('id') current_id = last_task.id + 1 except Task.DoesNotExist: current_id = 1 for task in db_tasks: task.id = current_id current_id += 1 self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE) else: self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE) logging.info(f'Tasks serialization success, len = {len(self.db_tasks)}') # add annotations for i, annotations in enumerate(task_annotations): for annotation in annotations: if not isinstance(annotation, dict): continue # support both "ground_truth" and "ground_truth" ground_truth = annotation.pop('ground_truth', True) was_cancelled = annotation.pop('was_cancelled', False) lead_time = annotation.pop('lead_time', None) db_annotations.append(Annotation(task=self.db_tasks[i], ground_truth=ground_truth, was_cancelled=was_cancelled, completed_by_id=annotation['completed_by_id'], result=annotation['result'], lead_time=lead_time)) # add predictions last_model_version = None for i, predictions in enumerate(task_predictions): for prediction in predictions: if not isinstance(prediction, dict): continue # we need to call result normalizer here since "bulk_create" doesn't call save() method result = Prediction.prepare_prediction_result(prediction['result'], self.project) prediction_score = prediction.get('score') if prediction_score is not None: try: prediction_score = float(prediction_score) except ValueError as exc: logger.error( f'Can\'t upload prediction score: should be in float format. Reason: {exc}.' f'Fallback to score=None', exc_info=True) prediction_score = None last_model_version = prediction.get('model_version', 'undefined') db_predictions.append(Prediction(task=self.db_tasks[i], result=result, score=prediction_score, model_version=last_model_version)) # annotations: DB bulk create if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_annotations = [] try: last_annotation = Annotation.objects.latest('id') current_id = last_annotation.id + 1 except Annotation.DoesNotExist: current_id = 1 for annotation in db_annotations: annotation.id = current_id current_id += 1 self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE) else: self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE) logging.info(f'Annotations serialization success, len = {len(self.db_annotations)}') # predictions: DB bulk create self.db_predictions = Prediction.objects.bulk_create(db_predictions, batch_size=settings.BATCH_SIZE) logging.info(f'Predictions serialization success, len = {len(self.db_predictions)}') # renew project model version if it's empty if not self.project.model_version and last_model_version is not None: self.project.model_version = last_model_version self.project.save() self.post_process_annotations(self.db_annotations) return db_tasks