示例#1
0
 def get_annotations(self, task):
     if not self.context.get('annotations'):
         return []
     return AnnotationSerializer(task.annotations,
                                 many=True,
                                 default=[],
                                 read_only=True).data
示例#2
0
 def save_annotation(self, annotation):
     client = self.get_client()
     logger.debug(
         f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}'
     )
     ser_annotation = AnnotationSerializer(annotation).data
     with transaction.atomic():
         # Create export storage link
         link = RedisExportStorageLink.create(annotation, self)
         client.set(link.key, json.dumps(ser_annotation))
示例#3
0
class ImportApiSerializer(TaskSerializer):
    """ Tasks serializer for Import API (TaskBulkCreateAPI)
    """
    annotations = AnnotationSerializer(many=True, default=[])
    predictions = PredictionSerializer(many=True, default=[])

    class Meta:
        model = Task
        list_serializer_class = TaskSerializerBulk
        exclude = ('is_labeled', 'project')
示例#4
0
    def post(self, request, *args, **kwargs):
        # get the cancelled task
        task = get_object_with_permissions(self.request, Task,
                                           self.kwargs['pk'],
                                           'tasks.change_task')

        # validate data from annotation
        annotation = AnnotationSerializer(data=request.data)
        annotation.is_valid(raise_exception=True)

        # set annotator last activity
        user = request.user
        user.activity_at = timezone.now()
        user.save()

        # serialize annotation, update task and save
        com = annotation.save(completed_by=user, was_cancelled=True, task=task)
        task.annotations.add(com)
        task.save()
        return Response(annotation.data, status=status.HTTP_200_OK)
示例#5
0
 def save_annotation(self, annotation):
     client, s3 = self.get_client_and_resource()
     logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}')
     ser_annotation = AnnotationSerializer(annotation).data
     with transaction.atomic():
         # Create export storage link
         link = S3ExportStorageLink.create(annotation, self)
         try:
             s3.Object(self.bucket, link.key).put(Body=ser_annotation)
         except Exception as exc:
             logger.error(f"Can't export annotation {annotation} to S3 storage {self}. Reason: {exc}", exc_info=True)
示例#6
0
 def save_annotation(self, annotation):
     bucket = self.get_bucket()
     logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}')
     ser_annotation = AnnotationSerializer(annotation).data
     with transaction.atomic():
         # Create export storage link
         link = GCSExportStorageLink.create(annotation, self)
         try:
             blob = bucket.blob(link.key)
             blob.upload_from_string(json.dumps(ser_annotation))
         except Exception as exc:
             logger.error(f"Can't export annotation {annotation} to GCS storage {self}. Reason: {exc}", exc_info=True)
示例#7
0
 def save_annotation(self, annotation):
     container = self.get_container()
     logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}')
     ser_annotation = AnnotationSerializer(annotation).data
     with transaction.atomic():
         # Create export storage link
         link = AzureBlobExportStorageLink.create(annotation, self)
         try:
             blob = container.get_blob_client(link.key)
             blob.upload_blob(json.dumps(ser_annotation))
         except Exception as exc:
             logger.error(f"Can't export annotation {annotation} to Azure storage {self}. Reason: {exc}", exc_info=True)
def predictions_to_annotations(project, queryset, **kwargs):
    request = kwargs['request']
    user = request.user
    model_version = request.data.get('model_version')
    queryset = queryset.filter(predictions__isnull=False)
    predictions = Prediction.objects.filter(task__in=queryset, child_annotations__isnull=True)

    # model version filter
    if model_version is not None:
        predictions = predictions.filter(model_version=model_version)

    predictions_values = list(predictions.values_list(
        'result', 'model_version', 'task_id', 'id'
    ))

    # prepare annotations
    annotations = []
    for result, model_version, task_id, prediction_id in predictions_values:
        annotations.append({
            'result': result,
            'completed_by': user.pk,
            'task': task_id,
            'parent_prediction': prediction_id
        })

    count = len(annotations)
    logger.debug(f'{count} predictions will be converter to annotations')
    annotation_ser = AnnotationSerializer(data=annotations, many=True)
    annotation_ser.is_valid(raise_exception=True)
    annotation_ser.save()

    return {'response_code': 200, 'detail': f'Created {count} annotations'}
示例#9
0
    def _scan_and_create_links(self, link_class):
        tasks_created = 0
        maximum_annotations = self.project.maximum_annotations
        task = self.project.tasks.order_by('-inner_id').first()
        max_inner_id = (task.inner_id + 1) if task else 1

        for key in self.iterkeys():
            logger.debug(f'Scanning key {key}')

            # skip if task already exists
            if link_class.exists(key, self):
                logger.debug(
                    f'{self.__class__.__name__} link {key} already exists')
                continue

            logger.debug(f'{self}: found new key {key}')
            try:
                data = self.get_data(key)
            except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc:
                logger.error(exc, exc_info=True)
                raise ValueError(
                    f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data '
                    f'(images, audio, text, etc.), edit storage settings and enable '
                    f'"Treat every bucket object as a source file"')

            # predictions
            predictions = data.get('predictions', [])
            if predictions:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "predictions" field in the task, '
                        'you must put "data" field in the task too')

            # annotations
            annotations = data.get('annotations', [])
            if annotations:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "annotations" field in the task, '
                        'you must put "data" field in the task too')

            if 'data' in data and isinstance(data['data'], dict):
                data = data['data']

            with transaction.atomic():
                task = Task.objects.create(
                    data=data,
                    project=self.project,
                    overlap=maximum_annotations,
                    is_labeled=len(annotations) >= maximum_annotations,
                    inner_id=max_inner_id)
                max_inner_id += 1

                link_class.create(task, key, self)
                logger.debug(
                    f'Create {self.__class__.__name__} link with key={key} for task={task}'
                )
                tasks_created += 1

                # add predictions
                logger.debug(
                    f'Create {len(predictions)} predictions for task={task}')
                for prediction in predictions:
                    prediction['task'] = task.id
                prediction_ser = PredictionSerializer(data=predictions,
                                                      many=True)
                prediction_ser.is_valid(raise_exception=True)
                prediction_ser.save()

                # add annotations
                logger.debug(
                    f'Create {len(annotations)} annotations for task={task}')
                for annotation in annotations:
                    annotation['task'] = task.id
                annotation_ser = AnnotationSerializer(data=annotations,
                                                      many=True)
                annotation_ser.is_valid(raise_exception=True)
                annotation_ser.save()

        self.last_sync = timezone.now()
        self.last_sync_count = tasks_created
        self.save()

        self.project.update_tasks_states(
            maximum_annotations_changed=False,
            overlap_cohort_percentage_changed=False,
            tasks_number_changed=True)
示例#10
0
def test_export(business_client, configured_project, finished, aggregator_type,
                return_task, num_task_in_result, annotation_items,
                aggregated_class):
    if aggregator_type == 'majority_vote' and not apps.is_installed(
            'businesses'):
        pytest.skip('Not supported aggregation for open-source version')

    task_query = Task.objects.filter(project=configured_project.id)
    task = task_query.first()

    expected_annotations_for_task = set()
    for annotation in annotation_items:
        db_annotation = Annotation.objects.create(
            task=task,
            result=annotation['result'],
            completed_by=business_client.admin)
        db_annotation = AnnotationSerializer(db_annotation).data
        annotation['id'] = db_annotation['id']
        annotation['created_at'] = db_annotation['created_at']
        annotation['updated_at'] = db_annotation['updated_at']
        annotation['completed_by'] = business_client.admin.id
        expected_annotations_for_task.add(json.dumps(annotation))

    r = business_client.get(f'/api/projects/{configured_project.id}/results/',
                            data={
                                'finished': finished,
                                'aggregator_type': aggregator_type,
                                'return_task': return_task
                            })
    assert r.status_code == 200
    exports = r.json()

    # test expected number of objects returned
    assert len(exports) == num_task_in_result

    # test whether "id" or full task included in results
    if return_task == '0':
        task_with_annotation = next((t for t in exports if t['id'] == task.id))
        assert task_with_annotation['id'] == task.id
    elif return_task == '1':
        task_with_annotation = next((t for t in exports if t['id'] == task.id))
        assert task_with_annotation['data'] == task.data
    else:
        raise Exception('Incorrect return_task param in test: ' +
                        str(return_task))

    # test how aggregation affects annotations
    if aggregator_type == 'no_aggregation':
        exported_annotations = set()
        for annotation in task_with_annotation['annotations']:
            exported_annotations.add(json.dumps(annotation))
        assert exported_annotations == expected_annotations_for_task
        if finished != '1':
            # we expect to see all tasks in exports...
            assert len(exports) == task_query.count()
            # ...as well as task without annotations (with empty results)
            assert all(
                len(t['annotations']) == 0 for t in exports
                if t['id'] != task.id)
    else:
        assert task_with_annotation['annotations'][0]['result'][0]['value'][
            'choices'][0] == aggregated_class
示例#11
0
class DataManagerTaskSerializer(TaskSerializer):
    predictions = PredictionSerializer(many=True, default=[], read_only=True)
    annotations = AnnotationSerializer(many=True, default=[], read_only=True)

    cancelled_annotations = serializers.SerializerMethodField()
    completed_at = serializers.SerializerMethodField()
    annotations_results = serializers.SerializerMethodField()
    predictions_results = serializers.SerializerMethodField()
    predictions_score = serializers.SerializerMethodField()
    total_annotations = serializers.SerializerMethodField()
    total_predictions = serializers.SerializerMethodField()
    file_upload = serializers.ReadOnlyField(source='file_upload_name')
    annotators = serializers.SerializerMethodField()

    class Meta:
        model = Task
        ref_name = 'data_manager_task_serializer'

        fields = [
            "cancelled_annotations", "completed_at", "created_at",
            "annotations_results", "data", "id", "predictions_results",
            "predictions_score", "total_annotations", "total_predictions",
            "annotations", "predictions", "file_upload", "annotators",
            "project"
        ]

    @staticmethod
    def get_cancelled_annotations(obj):
        return obj.annotations.filter(was_cancelled=True).count()

    @staticmethod
    def get_completed_at(obj):
        annotations = obj.annotations.all()
        if annotations:
            return max(c.created_at for c in annotations)
        return None

    @staticmethod
    def get_annotations_results(obj):
        annotations = obj.annotations.all()
        if annotations:
            return json.dumps([item.result for item in annotations])
        else:
            return ""

    @staticmethod
    def get_predictions_results(obj):
        predictions = obj.predictions.all()
        if predictions:
            return json.dumps([item.result for item in predictions])
        else:
            return ""

    @staticmethod
    def get_predictions_score(obj):
        predictions = obj.predictions.all()
        if predictions:
            values = [
                item.score for item in predictions
                if isinstance(item.score, (float, int))
            ]
            if values:
                return sum(values) / float(len(values))
        return None

    @staticmethod
    def get_total_predictions(obj):
        return obj.predictions.count()

    @staticmethod
    def get_total_annotations(obj):
        return obj.annotations.filter(was_cancelled=False).count()

    @staticmethod
    def get_annotators(obj):
        result = obj.annotations.values_list('completed_by',
                                             flat=True).distinct()
        result = [r for r in result if r is not None]
        return result
示例#12
0
 def get_annotations(self, task):
     return AnnotationSerializer(task.annotations, many=True, default=[], read_only=True).data
示例#13
0
class DataManagerTaskSerializer(TaskSerializer):
    predictions = PredictionSerializer(many=True, default=[], read_only=True)
    annotations = AnnotationSerializer(many=True, default=[], read_only=True)
    drafts = serializers.SerializerMethodField()

    cancelled_annotations = serializers.SerializerMethodField()
    completed_at = serializers.SerializerMethodField()
    annotations_results = serializers.SerializerMethodField()
    predictions_results = serializers.SerializerMethodField()
    predictions_score = serializers.SerializerMethodField()
    total_annotations = serializers.SerializerMethodField()
    total_predictions = serializers.SerializerMethodField()
    file_upload = serializers.ReadOnlyField(source='file_upload_name')
    annotators = serializers.SerializerMethodField()

    class Meta:
        model = Task
        ref_name = 'data_manager_task_serializer'

        fields = [
            "cancelled_annotations", "completed_at", "created_at",
            "annotations_results", "data", "id", "predictions_results",
            "predictions_score", "total_annotations", "total_predictions",
            "annotations", "predictions", "drafts", "file_upload",
            "annotators", "project"
        ]

    @staticmethod
    def get_cancelled_annotations(obj):
        return obj.annotations.filter(was_cancelled=True).count()

    @staticmethod
    def get_completed_at(obj):
        annotations = obj.annotations.all()
        if obj.is_labeled and annotations:
            return max(c.created_at for c in annotations)
        return None

    @staticmethod
    def get_annotations_results(obj):
        annotations = obj.annotations.all()
        if annotations:
            return json.dumps([item.result for item in annotations])
        else:
            return ""

    @staticmethod
    def get_predictions_results(obj):
        predictions = obj.predictions.all()
        if predictions:
            return json.dumps([item.result for item in predictions])
        else:
            return ""

    @staticmethod
    def get_predictions_score(obj):
        predictions = obj.predictions.all()
        if predictions:
            values = [
                item.score for item in predictions
                if isinstance(item.score, (float, int))
            ]
            if values:
                return sum(values) / float(len(values))
        return None

    @staticmethod
    def get_total_predictions(obj):
        return obj.predictions.count()

    @staticmethod
    def get_total_annotations(obj):
        return obj.annotations.filter(was_cancelled=False).count()

    @staticmethod
    def get_annotators(obj):
        result = obj.annotations.values_list('completed_by',
                                             flat=True).distinct()
        result = [r for r in result if r is not None]
        return result

    def get_drafts(self, task):
        """Return drafts only for the current user"""
        # it's for swagger documentation
        if not isinstance(task, Task):
            return AnnotationDraftSerializer(many=True)

        drafts = task.drafts
        if 'request' in self.context and hasattr(self.context['request'],
                                                 'user'):
            user = self.context['request'].user
            drafts = drafts.filter(user=user)

        return AnnotationDraftSerializer(drafts,
                                         many=True,
                                         read_only=True,
                                         default=True,
                                         context=self.context).data