def perform_create(self, ser): task = get_object_with_check_and_log(self.request, Task, pk=self.kwargs['pk']) # annotator has write access only to annotations and it can't be checked it after serializer.save() user = self.request.user # updates history result = ser.validated_data.get('result') extra_args = {'task_id': self.kwargs['pk']} # save stats about how well annotator annotations coincide with current prediction # only for finished task annotations if result is not None: prediction = Prediction.objects.filter( task=task, model_version=task.project.model_version) if prediction.exists(): prediction = prediction.first() prediction_ser = PredictionSerializer(prediction).data else: logger.debug( f'User={self.request.user}: there are no predictions for task={task}' ) prediction_ser = {} # serialize annotation extra_args.update({ 'prediction': prediction_ser, }) if 'was_cancelled' in self.request.GET: extra_args['was_cancelled'] = bool_from_request( self.request.GET, 'was_cancelled', False) if 'completed_by' not in ser.validated_data: extra_args['completed_by'] = self.request.user # create annotation logger.debug(f'User={self.request.user}: save annotation') annotation = ser.save(**extra_args) logger.debug(f'Save activity for user={self.request.user}') self.request.user.activity_at = timezone.now() self.request.user.save() # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error logger.debug(f'User={user} releases task={task}') task.release_lock(user) # if annotation created from draft - remove this draft draft_id = self.request.data.get('draft_id') if draft_id is not None: logger.debug( f'Remove draft {draft_id} after creating annotation {annotation.id}' ) AnnotationDraft.objects.filter(id=draft_id).delete() if self.request.data.get('ground_truth'): annotation.task.ensure_unique_groundtruth( annotation_id=annotation.id) return annotation
class ExportDataSerializer(FlexFieldsModelSerializer): annotations = AnnotationSerializer(many=True, read_only=True) predictions = PredictionSerializer(many=True, read_only=True) file_upload = serializers.ReadOnlyField(source='file_upload_name') drafts = serializers.PrimaryKeyRelatedField(many=True, read_only=True) predictions = serializers.PrimaryKeyRelatedField(many=True, read_only=True) # resolve $undefined$ key in task data, if any def to_representation(self, task): project = task.project data = task.data replace_task_data_undefined_with_config_field(data, project) return super().to_representation(task) class Meta: model = Task exclude = ('overlap', 'is_labeled') expandable_fields = { 'drafts': (AnnotationDraftSerializer, { 'many': True }), 'predictions': (PredictionSerializer, { 'many': True }), }
def get_predictions(self, task): if not self.context.get('predictions'): return [] return PredictionSerializer(task.predictions, many=True, default=[], read_only=True).data
class ImportApiSerializer(TaskSerializer): """ Tasks serializer for Import API (TaskBulkCreateAPI) """ annotations = AnnotationSerializer(many=True, default=[]) predictions = PredictionSerializer(many=True, default=[]) class Meta: model = Task list_serializer_class = TaskSerializerBulk exclude = ('is_labeled', 'project')
def predict_many_tasks(self, tasks): self.update_state() if self.not_ready: logger.debug(f'ML backend {self} is not ready') return if isinstance(tasks, list): from tasks.models import Task tasks = Task.objects.filter(id__in=[task.id for task in tasks]) tasks_ser = TaskSimpleSerializer(tasks, many=True).data ml_api_result = self.api.make_predictions(tasks_ser, self.model_version, self.project) if ml_api_result.is_error: logger.error( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return responses = ml_api_result.response['results'] if len(responses) == 0: logger.error( f'ML backend returned empty prediction for project {self}') return # ML Backend doesn't support batch of tasks, do it one by one elif len(responses) == 1: logger.warning( f"'ML backend '{self.title}' doesn't support batch processing of tasks, " f"switched to one-by-one task retrieving") for task in tasks: self.predict_one_task(task) return # wrong result number elif len(responses) != len(tasks_ser): logger.error( f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}' ) predictions = [] for task, response in zip(tasks_ser, responses): predictions.append({ 'task': task['id'], 'result': response['result'], 'score': response.get('score'), 'model_version': self.model_version }) with conditional_atomic(): prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save()
class ExportDataSerializer(serializers.ModelSerializer): annotations = AnnotationSerializer(many=True, read_only=True) predictions = PredictionSerializer(many=True, read_only=True) file_upload = serializers.ReadOnlyField(source='file_upload_name') # resolve $undefined$ key in task data, if any def to_representation(self, task): project = task.project data = task.data replace_task_data_undefined_with_config_field(data, project) return super().to_representation(task) class Meta: model = Task exclude = ('overlap', 'is_labeled')
class ExportDataSerializer(serializers.ModelSerializer): annotations = AnnotationSerializer(many=True, read_only=True) predictions = PredictionSerializer(many=True, read_only=True) file_upload = serializers.ReadOnlyField(source='file_upload_name') # resolve $undefined$ key in task data, if any def to_representation(self, task): project = task.project data = task.data data_types_keys = project.data_types.keys() if settings.DATA_UNDEFINED_NAME in data and data_types_keys: key = list(data_types_keys)[0] data[key] = data[settings.DATA_UNDEFINED_NAME] del data[settings.DATA_UNDEFINED_NAME] return super().to_representation(task) class Meta: model = Task exclude = ('overlap', 'is_labeled')
def _scan_and_create_links(self, link_class): tasks_created = 0 maximum_annotations = self.project.maximum_annotations task = self.project.tasks.order_by('-inner_id').first() max_inner_id = (task.inner_id + 1) if task else 1 for key in self.iterkeys(): logger.debug(f'Scanning key {key}') # skip if task already exists if link_class.exists(key, self): logger.debug( f'{self.__class__.__name__} link {key} already exists') continue logger.debug(f'{self}: found new key {key}') try: data = self.get_data(key) except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc: logger.error(exc, exc_info=True) raise ValueError( f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data ' f'(images, audio, text, etc.), edit storage settings and enable ' f'"Treat every bucket object as a source file"') # predictions predictions = data.get('predictions', []) if predictions: if 'data' not in data: raise ValueError( 'If you use "predictions" field in the task, ' 'you must put "data" field in the task too') # annotations annotations = data.get('annotations', []) if annotations: if 'data' not in data: raise ValueError( 'If you use "annotations" field in the task, ' 'you must put "data" field in the task too') if 'data' in data and isinstance(data['data'], dict): data = data['data'] with transaction.atomic(): task = Task.objects.create( data=data, project=self.project, overlap=maximum_annotations, is_labeled=len(annotations) >= maximum_annotations, inner_id=max_inner_id) max_inner_id += 1 link_class.create(task, key, self) logger.debug( f'Create {self.__class__.__name__} link with key={key} for task={task}' ) tasks_created += 1 # add predictions logger.debug( f'Create {len(predictions)} predictions for task={task}') for prediction in predictions: prediction['task'] = task.id prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save() # add annotations logger.debug( f'Create {len(annotations)} annotations for task={task}') for annotation in annotations: annotation['task'] = task.id annotation_ser = AnnotationSerializer(data=annotations, many=True) annotation_ser.is_valid(raise_exception=True) annotation_ser.save() self.last_sync = timezone.now() self.last_sync_count = tasks_created self.save() self.project.update_tasks_states( maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True)
class DataManagerTaskSerializer(TaskSerializer): predictions = PredictionSerializer(many=True, default=[], read_only=True) annotations = AnnotationSerializer(many=True, default=[], read_only=True) cancelled_annotations = serializers.SerializerMethodField() completed_at = serializers.SerializerMethodField() annotations_results = serializers.SerializerMethodField() predictions_results = serializers.SerializerMethodField() predictions_score = serializers.SerializerMethodField() total_annotations = serializers.SerializerMethodField() total_predictions = serializers.SerializerMethodField() file_upload = serializers.ReadOnlyField(source='file_upload_name') annotators = serializers.SerializerMethodField() class Meta: model = Task ref_name = 'data_manager_task_serializer' fields = [ "cancelled_annotations", "completed_at", "created_at", "annotations_results", "data", "id", "predictions_results", "predictions_score", "total_annotations", "total_predictions", "annotations", "predictions", "file_upload", "annotators", "project" ] @staticmethod def get_cancelled_annotations(obj): return obj.annotations.filter(was_cancelled=True).count() @staticmethod def get_completed_at(obj): annotations = obj.annotations.all() if annotations: return max(c.created_at for c in annotations) return None @staticmethod def get_annotations_results(obj): annotations = obj.annotations.all() if annotations: return json.dumps([item.result for item in annotations]) else: return "" @staticmethod def get_predictions_results(obj): predictions = obj.predictions.all() if predictions: return json.dumps([item.result for item in predictions]) else: return "" @staticmethod def get_predictions_score(obj): predictions = obj.predictions.all() if predictions: values = [ item.score for item in predictions if isinstance(item.score, (float, int)) ] if values: return sum(values) / float(len(values)) return None @staticmethod def get_total_predictions(obj): return obj.predictions.count() @staticmethod def get_total_annotations(obj): return obj.annotations.filter(was_cancelled=False).count() @staticmethod def get_annotators(obj): result = obj.annotations.values_list('completed_by', flat=True).distinct() result = [r for r in result if r is not None] return result
def get_predictions(self, task): return PredictionSerializer(task.predictions, many=True, default=[], read_only=True).data
class DataManagerTaskSerializer(TaskSerializer): predictions = PredictionSerializer(many=True, default=[], read_only=True) annotations = AnnotationSerializer(many=True, default=[], read_only=True) drafts = serializers.SerializerMethodField() cancelled_annotations = serializers.SerializerMethodField() completed_at = serializers.SerializerMethodField() annotations_results = serializers.SerializerMethodField() predictions_results = serializers.SerializerMethodField() predictions_score = serializers.SerializerMethodField() total_annotations = serializers.SerializerMethodField() total_predictions = serializers.SerializerMethodField() file_upload = serializers.ReadOnlyField(source='file_upload_name') annotators = serializers.SerializerMethodField() class Meta: model = Task ref_name = 'data_manager_task_serializer' fields = [ "cancelled_annotations", "completed_at", "created_at", "annotations_results", "data", "id", "predictions_results", "predictions_score", "total_annotations", "total_predictions", "annotations", "predictions", "drafts", "file_upload", "annotators", "project" ] @staticmethod def get_cancelled_annotations(obj): return obj.annotations.filter(was_cancelled=True).count() @staticmethod def get_completed_at(obj): annotations = obj.annotations.all() if obj.is_labeled and annotations: return max(c.created_at for c in annotations) return None @staticmethod def get_annotations_results(obj): annotations = obj.annotations.all() if annotations: return json.dumps([item.result for item in annotations]) else: return "" @staticmethod def get_predictions_results(obj): predictions = obj.predictions.all() if predictions: return json.dumps([item.result for item in predictions]) else: return "" @staticmethod def get_predictions_score(obj): predictions = obj.predictions.all() if predictions: values = [ item.score for item in predictions if isinstance(item.score, (float, int)) ] if values: return sum(values) / float(len(values)) return None @staticmethod def get_total_predictions(obj): return obj.predictions.count() @staticmethod def get_total_annotations(obj): return obj.annotations.filter(was_cancelled=False).count() @staticmethod def get_annotators(obj): result = obj.annotations.values_list('completed_by', flat=True).distinct() result = [r for r in result if r is not None] return result def get_drafts(self, task): """Return drafts only for the current user""" # it's for swagger documentation if not isinstance(task, Task): return AnnotationDraftSerializer(many=True) drafts = task.drafts if 'request' in self.context and hasattr(self.context['request'], 'user'): user = self.context['request'].user drafts = drafts.filter(user=user) return AnnotationDraftSerializer(drafts, many=True, read_only=True, default=True, context=self.context).data