示例#1
0
def _import_annotations(request, rq_id, rq_func, pk, format_name):
    format_desc = {f.DISPLAY_NAME: f
        for f in dm.views.get_import_formats()}.get(format_name)
    if format_desc is None:
        raise serializers.ValidationError(
            "Unknown input format '{}'".format(format_name))
    elif not format_desc.ENABLED:
        return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED)

    queue = django_rq.get_queue("default")
    rq_job = queue.fetch_job(rq_id)

    if not rq_job:
        serializer = AnnotationFileSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            anno_file = serializer.validated_data['annotation_file']
            fd, filename = mkstemp(prefix='cvat_{}'.format(pk))
            with open(filename, 'wb+') as f:
                for chunk in anno_file.chunks():
                    f.write(chunk)

            av_scan_paths(filename)
            rq_job = queue.enqueue_call(
                func=rq_func,
                args=(pk, filename, format_name),
                job_id=rq_id
            )
            rq_job.meta['tmp_file'] = filename
            rq_job.meta['tmp_file_descriptor'] = fd
            rq_job.save_meta()
    else:
        if rq_job.is_finished:
            os.close(rq_job.meta['tmp_file_descriptor'])
            os.remove(rq_job.meta['tmp_file'])
            rq_job.delete()
            return Response(status=status.HTTP_201_CREATED)
        elif rq_job.is_failed:
            os.close(rq_job.meta['tmp_file_descriptor'])
            os.remove(rq_job.meta['tmp_file'])
            exc_info = str(rq_job.exc_info)
            rq_job.delete()
            return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    return Response(status=status.HTTP_202_ACCEPTED)
示例#2
0
def import_task(filename, user):
    av_scan_paths(filename)
    task_importer = TaskImporter(filename, user)
    db_task = task_importer.import_task()
    return db_task.id
示例#3
0
def _create_thread(tid, data):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    if db_task.data.size != 0:
        raise NotImplementedError("Adding more data is not implemented")

    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    meta_info_file = []
    media = _count_files(data, meta_info_file)
    media, task_mode = _validate_data(media, meta_info_file)
    if meta_info_file:
        assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \
            "File with meta information can be uploaded if 'Use cache' option is also selected"

    if data['server_files']:
        _copy_data_from_share(data['server_files'], upload_dir)

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=[os.path.join(upload_dir, f) for f in media_files],
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )
    if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
        extractor.extract()
    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[
        'use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(
                progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter +
                                        1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter
    original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter

    compressed_chunk_writer = compressed_chunk_writer_class(
        db_data.image_quality)
    original_chunk_writer = original_chunk_writer_class(100)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            w, h = extractor.get_image_size(0)
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
        for media_type, media_files in media.items():
            if not media_files:
                continue

            if task_mode == MEDIA_TYPES['video']['mode']:
                try:
                    if meta_info_file:
                        try:
                            from cvat.apps.engine.prepare import UploadedMeta
                            if os.path.split(meta_info_file[0])[0]:
                                os.replace(
                                    os.path.join(upload_dir,
                                                 meta_info_file[0]),
                                    db_data.get_meta_path())
                            meta_info = UploadedMeta(
                                source_path=os.path.join(
                                    upload_dir, media_files[0]),
                                meta_path=db_data.get_meta_path())
                            meta_info.check_seek_key_frames()
                            meta_info.check_frames_numbers()
                            meta_info.save_meta_info()
                            assert len(
                                meta_info.key_frames) > 0, 'No key frames.'
                        except Exception as ex:
                            base_msg = str(ex) if isinstance(ex, AssertionError) else \
                                'Invalid meta information was upload.'
                            job.meta[
                                'status'] = '{} Start prepare valid meta information.'.format(
                                    base_msg)
                            job.save_meta()
                            meta_info, smooth_decoding = prepare_meta(
                                media_file=media_files[0],
                                upload_dir=upload_dir,
                                chunk_size=db_data.chunk_size)
                            assert smooth_decoding == True, 'Too few keyframes for smooth video decoding.'
                    else:
                        meta_info, smooth_decoding = prepare_meta(
                            media_file=media_files[0],
                            upload_dir=upload_dir,
                            chunk_size=db_data.chunk_size)
                        assert smooth_decoding == True, 'Too few keyframes for smooth video decoding.'

                    all_frames = meta_info.get_task_size()
                    video_size = meta_info.frame_sizes

                    db_data.size = len(
                        range(
                            db_data.start_frame,
                            min(
                                data['stop_frame'] +
                                1 if data['stop_frame'] else all_frames,
                                all_frames), db_data.get_frame_step()))
                    video_path = os.path.join(upload_dir, media_files[0])
                except Exception as ex:
                    db_data.storage_method = StorageMethodChoice.FILE_SYSTEM
                    if os.path.exists(db_data.get_meta_path()):
                        os.remove(db_data.get_meta_path())
                    base_msg = str(ex) if isinstance(
                        ex, AssertionError
                    ) else "Uploaded video does not support a quick way of task creating."
                    job.meta[
                        'status'] = "{} The task will be created using the old method".format(
                            base_msg)
                    job.save_meta()
            else:  #images,archive
                db_data.size = len(extractor)

                counter = itertools.count()
                for chunk_number, chunk_frames in itertools.groupby(
                        extractor.frame_range,
                        lambda x: next(counter) // db_data.chunk_size):
                    chunk_paths = [(extractor.get_path(i), i)
                                   for i in chunk_frames]
                    img_sizes = []
                    with open(db_data.get_dummy_chunk_path(chunk_number),
                              'w') as dummy_chunk:
                        for path, frame_id in chunk_paths:
                            dummy_chunk.write(path + '\n')
                            img_sizes.append(
                                extractor.get_image_size(frame_id))

                    db_images.extend([
                        models.Image(data=db_data,
                                     path=os.path.relpath(path, upload_dir),
                                     frame=frame,
                                     width=w,
                                     height=h)
                        for (path, frame), (w,
                                            h) in zip(chunk_paths, img_sizes)
                    ])

    if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(
            extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data,
                                                original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(
                chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(
                chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(data=db_data,
                                 path=os.path.relpath(data[1], upload_dir),
                                 frame=data[2],
                                 width=size[0],
                                 height=size[1])
                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        db_images = []
    else:
        models.Video.objects.create(data=db_data,
                                    path=os.path.relpath(
                                        video_path, upload_dir),
                                    width=video_size[0],
                                    height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (
            db_data.size - 1) * db_data.get_frame_step()

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Found frames {} for Data #{}".format(
        db_data.size, db_data.id))
    _save_task_to_db(db_task)
示例#4
0
文件: task.py 项目: z80020100/cvat
def _create_thread(tid, data):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    if db_task.data.size != 0:
        raise NotImplementedError("Adding more data is not implemented")

    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    manifest_file = []
    media = _count_files(data, manifest_file)
    media, task_mode = _validate_data(media, manifest_file)
    if manifest_file:
        assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \
            "File with meta information can be uploaded if 'Use cache' option is also selected"

    if data['server_files']:
        if db_data.storage == StorageChoice.LOCAL:
            _copy_data_from_share(data['server_files'], upload_dir)
        else:
            upload_dir = settings.SHARE_ROOT

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            source_paths=[os.path.join(upload_dir, f) for f in media_files]
            if media_type in {'archive', 'zip'} and db_data.storage == StorageChoice.SHARE:
                source_paths.append(db_data.get_upload_dirname())
                upload_dir = db_data.get_upload_dirname()
                db_data.storage = StorageChoice.LOCAL
            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=source_paths,
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )

    validate_dimension = ValidateDimension()
    if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
        extractor.extract()
        validate_dimension.set_path(os.path.split(extractor.get_zip_filename())[0])
        validate_dimension.validate()
        if validate_dimension.dimension == DimensionType.DIM_3D:
            db_task.dimension = DimensionType.DIM_3D

            extractor.reconcile(
                source_files=list(validate_dimension.related_files.keys()),
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
                dimension=DimensionType.DIM_3D,

            )
            extractor.add_files(validate_dimension.converted_files)

    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter
    if db_data.original_chunk_type == DataChoice.VIDEO:
        original_chunk_writer_class = Mpeg4ChunkWriter
        # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so.
        # A lower value will significantly increase the chunk size with a slight increase of quality.
        original_quality = 67
    else:
        original_chunk_writer_class = ZipChunkWriter
        original_quality = 100

    kwargs = {}
    if validate_dimension.dimension == DimensionType.DIM_3D:
        kwargs["dimension"] = validate_dimension.dimension
    compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs)
    original_chunk_writer = original_chunk_writer_class(original_quality)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            w, h = extractor.get_image_size(0)
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36


    video_path = ""
    video_size = (0, 0)

    def _update_status(msg):
        job.meta['status'] = msg
        job.save_meta()

    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
       for media_type, media_files in media.items():

            if not media_files:
                continue

            # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl')
            if manifest_file and not os.path.exists(db_data.get_manifest_path()):
                shutil.copyfile(os.path.join(upload_dir, manifest_file[0]),
                    db_data.get_manifest_path())
                if upload_dir != settings.SHARE_ROOT:
                    os.remove(os.path.join(upload_dir, manifest_file[0]))

            if task_mode == MEDIA_TYPES['video']['mode']:
                try:
                    manifest_is_prepared = False
                    if manifest_file:
                        try:
                            manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]),
                                                              manifest_path=db_data.get_manifest_path())
                            manifest.init_index()
                            manifest.validate_seek_key_frames()
                            manifest.validate_frame_numbers()
                            assert len(manifest) > 0, 'No key frames.'

                            all_frames = manifest['properties']['length']
                            video_size = manifest['properties']['resolution']
                            manifest_is_prepared = True
                        except Exception as ex:
                            if os.path.exists(db_data.get_index_path()):
                                os.remove(db_data.get_index_path())
                            if isinstance(ex, AssertionError):
                                base_msg = str(ex)
                            else:
                                base_msg = 'Invalid manifest file was upload.'
                                slogger.glob.warning(str(ex))
                            _update_status('{} Start prepare a valid manifest file.'.format(base_msg))

                    if not manifest_is_prepared:
                        _update_status('Start prepare a manifest file')
                        manifest = VideoManifestManager(db_data.get_manifest_path())
                        meta_info = manifest.prepare_meta(
                            media_file=media_files[0],
                            upload_dir=upload_dir,
                            chunk_size=db_data.chunk_size
                        )
                        manifest.create(meta_info)
                        manifest.init_index()
                        _update_status('A manifest had been created')

                        all_frames = meta_info.get_size()
                        video_size = meta_info.frame_sizes
                        manifest_is_prepared = True

                    db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \
                        if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
                    video_path = os.path.join(upload_dir, media_files[0])
                except Exception as ex:
                    db_data.storage_method = StorageMethodChoice.FILE_SYSTEM
                    if os.path.exists(db_data.get_manifest_path()):
                        os.remove(db_data.get_manifest_path())
                    if os.path.exists(db_data.get_index_path()):
                        os.remove(db_data.get_index_path())
                    base_msg = str(ex) if isinstance(ex, AssertionError) \
                        else "Uploaded video does not support a quick way of task creating."
                    _update_status("{} The task will be created using the old method".format(base_msg))
            else:# images, archive, pdf
                db_data.size = len(extractor)
                manifest = ImageManifestManager(db_data.get_manifest_path())
                if not manifest_file:
                    if db_task.dimension == DimensionType.DIM_2D:
                        meta_info = manifest.prepare_meta(
                            sources=extractor.absolute_source_paths,
                            data_dir=upload_dir
                        )
                        content = meta_info.content
                    else:
                        content = []
                        for source in extractor.absolute_source_paths:
                            name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
                            content.append({
                                'name': name,
                                'extension': ext
                            })
                    manifest.create(content)
                manifest.init_index()
                counter = itertools.count()
                for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size):
                    chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
                    img_sizes = []

                    for _, frame_id in chunk_paths:
                        properties = manifest[frame_id]
                        if db_task.dimension == DimensionType.DIM_2D:
                            resolution = (properties['width'], properties['height'])
                        else:
                            resolution = extractor.get_image_size(frame_id)
                        img_sizes.append(resolution)

                    db_images.extend([
                        models.Image(data=db_data,
                            path=os.path.relpath(path, upload_dir),
                            frame=frame, width=w, height=h)
                        for (path, frame), (w, h) in zip(chunk_paths, img_sizes)
                    ])

    if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(
                        data=db_data,
                        path=os.path.relpath(data[1], upload_dir),
                        frame=data[2],
                        width=size[0],
                        height=size[1])

                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        if validate_dimension.dimension == DimensionType.DIM_2D:
            models.Image.objects.bulk_create(db_images)
        else:
            related_file = []
            for image_data in db_images:
                image_model = models.Image(
                    data=image_data.data,
                    path=image_data.path,
                    frame=image_data.frame,
                    width=image_data.width,
                    height=image_data.height
                )

                image_model.save()
                image_data = models.Image.objects.get(id=image_model.id)

                if validate_dimension.related_files.get(image_data.path, None):
                    for related_image_file in validate_dimension.related_files[image_data.path]:
                        related_file.append(
                            RelatedFile(data=db_data, primary_image_id=image_data.id, path=related_image_file))
            RelatedFile.objects.bulk_create(related_file)
        db_images = []
    else:
        models.Video.objects.create(
            data=db_data,
            path=os.path.relpath(video_path, upload_dir),
            width=video_size[0], height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()
    else:
        # validate stop_frame
        db_data.stop_frame = min(db_data.stop_frame, \
            db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step())

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id))
    _save_task_to_db(db_task)
示例#5
0
文件: task.py 项目: aschernov/cvat
def _create_thread(tid, data, isImport=False):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        if db_data.storage != models.StorageChoice.CLOUD_STORAGE:
            data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    manifest_file = []
    media = _count_files(data, manifest_file)
    media, task_mode = _validate_data(media, manifest_file)
    if manifest_file:
        assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \
            "File with meta information can be uploaded if 'Use cache' option is also selected"

    if data['server_files']:
        if db_data.storage == models.StorageChoice.LOCAL:
            _copy_data_from_share(data['server_files'], upload_dir)
        elif db_data.storage == models.StorageChoice.SHARE:
            upload_dir = settings.SHARE_ROOT
        else: # cloud storage
            if not manifest_file: raise Exception('A manifest file not found')
            db_cloud_storage = db_data.cloud_storage
            credentials = Credentials()
            credentials.convert_from_db({
               'type': db_cloud_storage.credentials_type,
               'value': db_cloud_storage.credentials,
            })

            details = {
                'resource': db_cloud_storage.resource,
                'credentials': credentials,
                'specific_attributes': db_cloud_storage.get_specific_attributes()
            }
            cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
            first_sorted_media_image = sorted(media['image'])[0]
            cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))

            # prepare task manifest file from cloud storage manifest file
            manifest = ImageManifestManager(db_data.get_manifest_path())
            cloud_storage_manifest = ImageManifestManager(
                os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0])
            )
            cloud_storage_manifest.set_index()
            media_files = sorted(media['image'])
            content = cloud_storage_manifest.get_subset(media_files)
            manifest.create(content)
            manifest.init_index()

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None
    manifest_index = _get_manifest_frame_indexer()

    # If upload from server_files image and directories
    # need to update images list by all found images in directories
    if (data['server_files']) and len(media['directory']) and len(media['image']):
        media['image'].extend(
            [os.path.relpath(image, upload_dir) for image in
                MEDIA_TYPES['directory']['extractor'](
                    source_path=[os.path.join(upload_dir, f) for f in media['directory']],
                ).absolute_source_paths
            ]
        )
        media['directory'] = []

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            source_paths=[os.path.join(upload_dir, f) for f in media_files]
            if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
                source_paths.append(db_data.get_upload_dirname())
                upload_dir = db_data.get_upload_dirname()
                db_data.storage = models.StorageChoice.LOCAL
            if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
                manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
                db_data.start_frame = 0
                data['stop_frame'] = None
                db_data.frame_filter = ''

            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=source_paths,
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )


    validate_dimension = ValidateDimension()
    if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
        extractor.extract()

    if db_data.storage == models.StorageChoice.LOCAL or \
        (db_data.storage == models.StorageChoice.SHARE and \
        isinstance(extractor, MEDIA_TYPES['zip']['extractor'])):
        validate_dimension.set_path(upload_dir)
        validate_dimension.validate()

    if db_task.project is not None and db_task.project.tasks.count() > 1 and db_task.project.tasks.first().dimension != validate_dimension.dimension:
        raise Exception(f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})')

    if validate_dimension.dimension == models.DimensionType.DIM_3D:
        db_task.dimension = models.DimensionType.DIM_3D

        extractor.reconcile(
            source_files=[os.path.join(upload_dir, f) for f in validate_dimension.related_files.keys()],
            step=db_data.get_frame_step(),
            start=db_data.start_frame,
            stop=data['stop_frame'],
            dimension=models.DimensionType.DIM_3D,
        )

    related_images = {}
    if isinstance(extractor, MEDIA_TYPES['image']['extractor']):
        extractor.filter(lambda x: not re.search(r'(^|{0})related_images{0}'.format(os.sep), x))
        related_images = detect_related_images(extractor.absolute_source_paths, upload_dir)

    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter
    if db_data.original_chunk_type == models.DataChoice.VIDEO:
        original_chunk_writer_class = Mpeg4ChunkWriter
        # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so.
        # A lower value will significantly increase the chunk size with a slight increase of quality.
        original_quality = 67
    else:
        original_chunk_writer_class = ZipChunkWriter
        original_quality = 100

    kwargs = {}
    if validate_dimension.dimension == models.DimensionType.DIM_3D:
        kwargs["dimension"] = validate_dimension.dimension
    compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs)
    original_chunk_writer = original_chunk_writer_class(original_quality)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE):
                w, h = extractor.get_image_size(0)
            else:
                img_properties = manifest[0]
                w, h = img_properties['width'], img_properties['height']
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    def _update_status(msg):
        job.meta['status'] = msg
        job.save_meta()

    if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE:
       for media_type, media_files in media.items():

            if not media_files:
                continue

            # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl')
            if manifest_file and not os.path.exists(db_data.get_manifest_path()):
                shutil.copyfile(os.path.join(upload_dir, manifest_file[0]),
                    db_data.get_manifest_path())
                if upload_dir != settings.SHARE_ROOT:
                    os.remove(os.path.join(upload_dir, manifest_file[0]))

            if task_mode == MEDIA_TYPES['video']['mode']:
                try:
                    manifest_is_prepared = False
                    if manifest_file:
                        try:
                            manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]),
                                                              manifest_path=db_data.get_manifest_path())
                            manifest.init_index()
                            manifest.validate_seek_key_frames()
                            manifest.validate_frame_numbers()
                            assert len(manifest) > 0, 'No key frames.'

                            all_frames = manifest.video_length
                            video_size = manifest.video_resolution
                            manifest_is_prepared = True
                        except Exception as ex:
                            if os.path.exists(db_data.get_index_path()):
                                os.remove(db_data.get_index_path())
                            if isinstance(ex, AssertionError):
                                base_msg = str(ex)
                            else:
                                base_msg = 'Invalid manifest file was upload.'
                                slogger.glob.warning(str(ex))
                            _update_status('{} Start prepare a valid manifest file.'.format(base_msg))

                    if not manifest_is_prepared:
                        _update_status('Start prepare a manifest file')
                        manifest = VideoManifestManager(db_data.get_manifest_path())
                        meta_info = manifest.prepare_meta(
                            media_file=media_files[0],
                            upload_dir=upload_dir,
                            chunk_size=db_data.chunk_size
                        )
                        manifest.create(meta_info)
                        manifest.init_index()
                        _update_status('A manifest had been created')

                        all_frames = meta_info.get_size()
                        video_size = meta_info.frame_sizes
                        manifest_is_prepared = True

                    db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \
                        if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
                    video_path = os.path.join(upload_dir, media_files[0])
                except Exception as ex:
                    db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM
                    if os.path.exists(db_data.get_manifest_path()):
                        os.remove(db_data.get_manifest_path())
                    if os.path.exists(db_data.get_index_path()):
                        os.remove(db_data.get_index_path())
                    base_msg = str(ex) if isinstance(ex, AssertionError) \
                        else "Uploaded video does not support a quick way of task creating."
                    _update_status("{} The task will be created using the old method".format(base_msg))
            else: # images, archive, pdf
                db_data.size = len(extractor)
                manifest = ImageManifestManager(db_data.get_manifest_path())
                if not manifest_file:
                    if db_task.dimension == models.DimensionType.DIM_2D:
                        meta_info = manifest.prepare_meta(
                            sources=extractor.absolute_source_paths,
                            meta={ k: {'related_images': related_images[k] } for k in related_images },
                            data_dir=upload_dir
                        )
                        content = meta_info.content
                    else:
                        content = []
                        for source in extractor.absolute_source_paths:
                            name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
                            content.append({
                                'name': name,
                                'meta': { 'related_images': related_images[''.join((name, ext))] },
                                'extension': ext
                            })
                    manifest.create(content)
                manifest.init_index()
                counter = itertools.count()
                for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size):
                    chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
                    img_sizes = []

                    for _, frame_id in chunk_paths:
                        properties = manifest[manifest_index(frame_id)]
                        if db_task.dimension == models.DimensionType.DIM_2D:
                            resolution = (properties['width'], properties['height'])
                        else:
                            resolution = extractor.get_image_size(frame_id)
                        img_sizes.append(resolution)

                    db_images.extend([
                        models.Image(data=db_data,
                            path=os.path.relpath(path, upload_dir),
                            frame=frame, width=w, height=h)
                        for (path, frame), (w, h) in zip(chunk_paths, img_sizes)
                    ])

    if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(
                        data=db_data,
                        path=os.path.relpath(data[1], upload_dir),
                        frame=data[2],
                        width=size[0],
                        height=size[1])

                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        created_images = models.Image.objects.filter(data_id=db_data.id)

        db_related_files = [
            models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path))
            for image in created_images
            for related_file_path in related_images.get(image.path, [])
        ]
        models.RelatedFile.objects.bulk_create(db_related_files)
        db_images = []
    else:
        models.Video.objects.create(
            data=db_data,
            path=os.path.relpath(video_path, upload_dir),
            width=video_size[0], height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()
    else:
        # validate stop_frame
        db_data.stop_frame = min(db_data.stop_frame, \
            db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step())

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id))
    _save_task_to_db(db_task)
示例#6
0
文件: task.py 项目: quuhua911/cvat
def _create_thread(db_task,
                   data,
                   isBackupRestore=False,
                   isDatasetImport=False):
    if isinstance(db_task, int):
        db_task = models.Task.objects.select_for_update().get(pk=db_task)

    slogger.glob.info("create task #{}".format(db_task.id))

    db_data = db_task.data
    upload_dir = db_data.get_upload_dirname()

    if data['remote_files'] and not isDatasetImport:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    manifest_files = []
    media = _count_files(data, manifest_files)
    media, task_mode = _validate_data(media, manifest_files)

    if data['server_files']:
        if db_data.storage == models.StorageChoice.LOCAL:
            _copy_data_from_source(data['server_files'], upload_dir,
                                   data.get('server_files_path'))
        elif db_data.storage == models.StorageChoice.SHARE:
            upload_dir = settings.SHARE_ROOT

    manifest_root = None
    if db_data.storage in {
            models.StorageChoice.LOCAL, models.StorageChoice.SHARE
    }:
        manifest_root = upload_dir
    elif db_data.storage == models.StorageChoice.CLOUD_STORAGE:
        manifest_root = db_data.cloud_storage.get_storage_dirname()

    manifest_file = _validate_manifest(manifest_files, manifest_root)
    if manifest_file and (not settings.USE_CACHE or db_data.storage_method !=
                          models.StorageMethodChoice.CACHE):
        raise Exception(
            "File with meta information can be uploaded if 'Use cache' option is also selected"
        )

    if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE:
        if not manifest_file: raise Exception('A manifest file not found')
        db_cloud_storage = db_data.cloud_storage
        credentials = Credentials()
        credentials.convert_from_db({
            'type': db_cloud_storage.credentials_type,
            'value': db_cloud_storage.credentials,
        })

        details = {
            'resource': db_cloud_storage.resource,
            'credentials': credentials,
            'specific_attributes': db_cloud_storage.get_specific_attributes()
        }
        cloud_storage_instance = get_cloud_storage_instance(
            cloud_provider=db_cloud_storage.provider_type, **details)
        sorted_media = sort(media['image'], data['sorting_method'])
        first_sorted_media_image = sorted_media[0]
        cloud_storage_instance.download_file(
            first_sorted_media_image,
            os.path.join(upload_dir, first_sorted_media_image))

        # prepare task manifest file from cloud storage manifest file
        # NOTE we should create manifest before defining chunk_size
        # FIXME in the future when will be implemented archive support
        manifest = ImageManifestManager(db_data.get_manifest_path())
        cloud_storage_manifest = ImageManifestManager(
            os.path.join(db_data.cloud_storage.get_storage_dirname(),
                         manifest_file),
            db_data.cloud_storage.get_storage_dirname())
        cloud_storage_manifest.set_index()
        sequence, content = cloud_storage_manifest.get_subset(sorted_media)
        sorted_content = (i[1] for i in sorted(zip(sequence, content)))
        manifest.create(sorted_content)

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None
    manifest_index = _get_manifest_frame_indexer()

    # If upload from server_files image and directories
    # need to update images list by all found images in directories
    if (data['server_files']) and len(media['directory']) and len(
            media['image']):
        media['image'].extend([
            os.path.relpath(image, upload_dir)
            for image in MEDIA_TYPES['directory']['extractor'](source_path=[
                os.path.join(upload_dir, f) for f in media['directory']
            ], ).absolute_source_paths
        ])
        media['directory'] = []

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            if (
                    isDatasetImport or isBackupRestore
            ) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
                manifest_index = _get_manifest_frame_indexer(
                    db_data.start_frame, db_data.get_frame_step())
                db_data.start_frame = 0
                data['stop_frame'] = None
                db_data.frame_filter = ''
            source_paths = [os.path.join(upload_dir, f) for f in media_files]
            if manifest_file and not isBackupRestore and data[
                    'sorting_method'] in {
                        models.SortingMethod.RANDOM,
                        models.SortingMethod.PREDEFINED
                    }:
                raise Exception(
                    "It isn't supported to upload manifest file and use random sorting"
                )
            if isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
                    data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
                raise Exception(
                    "It isn't supported to import the task that was created without cache but with random/predefined sorting"
                )

            details = {
                'source_path': source_paths,
                'step': db_data.get_frame_step(),
                'start': db_data.start_frame,
                'stop': data['stop_frame'],
            }
            if media_type in {
                    'archive', 'zip', 'pdf'
            } and db_data.storage == models.StorageChoice.SHARE:
                details['extract_dir'] = db_data.get_upload_dirname()
                upload_dir = db_data.get_upload_dirname()
                db_data.storage = models.StorageChoice.LOCAL
            if media_type != 'video':
                details['sorting_method'] = data['sorting_method']
            extractor = MEDIA_TYPES[media_type]['extractor'](**details)

    validate_dimension = ValidateDimension()
    if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
        extractor.extract()

    if db_data.storage == models.StorageChoice.LOCAL or \
            (db_data.storage == models.StorageChoice.SHARE and \
            isinstance(extractor, MEDIA_TYPES['zip']['extractor'])):
        validate_dimension.set_path(upload_dir)
        validate_dimension.validate()

    if db_task.project is not None and db_task.project.tasks.count(
    ) > 1 and db_task.project.tasks.first(
    ).dimension != validate_dimension.dimension:
        raise Exception(
            f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})'
        )

    if validate_dimension.dimension == models.DimensionType.DIM_3D:
        db_task.dimension = models.DimensionType.DIM_3D

        keys_of_related_files = validate_dimension.related_files.keys()
        absolute_keys_of_related_files = [
            os.path.join(upload_dir, f) for f in keys_of_related_files
        ]
        # When a task is created, the sorting method can be random and in this case, reinitialization will be with correct sorting
        # but when a task is restored from a backup, a random sorting is changed to predefined and we need to manually sort files
        # in the correct order.
        source_files = absolute_keys_of_related_files if not isBackupRestore else \
            [item for item in extractor.absolute_source_paths if item in absolute_keys_of_related_files]
        extractor.reconcile(
            source_files=source_files,
            step=db_data.get_frame_step(),
            start=db_data.start_frame,
            stop=data['stop_frame'],
            dimension=models.DimensionType.DIM_3D,
        )

    related_images = {}
    if isinstance(extractor, MEDIA_TYPES['image']['extractor']):
        extractor.filter(lambda x: not re.search(
            r'(^|{0})related_images{0}'.format(os.sep), x))
        related_images = detect_related_images(extractor.absolute_source_paths,
                                               upload_dir)

    if isBackupRestore and not isinstance(extractor, MEDIA_TYPES['video']['extractor']) and db_data.storage_method == models.StorageMethodChoice.CACHE and \
            db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} and validate_dimension.dimension != models.DimensionType.DIM_3D:
        # we should sort media_files according to the manifest content sequence
        # and we should do this in general after validation step for 3D data and after filtering from related_images
        manifest = ImageManifestManager(db_data.get_manifest_path())
        manifest.set_index()
        sorted_media_files = []

        for idx in range(len(extractor.absolute_source_paths)):
            properties = manifest[idx]
            image_name = properties.get('name', None)
            image_extension = properties.get('extension', None)

            full_image_path = os.path.join(
                upload_dir, f"{image_name}{image_extension}"
            ) if image_name and image_extension else None
            if full_image_path and full_image_path in extractor:
                sorted_media_files.append(full_image_path)
        media_files = sorted_media_files.copy()
        del sorted_media_files
        data['sorting_method'] = models.SortingMethod.PREDEFINED
        extractor.reconcile(
            source_files=media_files,
            step=db_data.get_frame_step(),
            start=db_data.start_frame,
            stop=data['stop_frame'],
            sorting_method=data['sorting_method'],
        )

    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[
        'use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_message = 'Images are being compressed'
        if not progress:
            status_message = '{} {}'.format(
                status_message,
                progress_animation[update_progress.call_counter])
        job.meta['status'] = status_message
        job.meta['task_progress'] = progress or 0.
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter +
                                        1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter
    if db_data.original_chunk_type == models.DataChoice.VIDEO:
        original_chunk_writer_class = Mpeg4ChunkWriter
        # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so.
        # A lower value will significantly increase the chunk size with a slight increase of quality.
        original_quality = 67
    else:
        original_chunk_writer_class = ZipChunkWriter
        original_quality = 100

    kwargs = {}
    if validate_dimension.dimension == models.DimensionType.DIM_3D:
        kwargs["dimension"] = validate_dimension.dimension
    compressed_chunk_writer = compressed_chunk_writer_class(
        db_data.image_quality, **kwargs)
    original_chunk_writer = original_chunk_writer_class(original_quality)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE):
                w, h = extractor.get_image_size(0)
            else:
                img_properties = manifest[0]
                w, h = img_properties['width'], img_properties['height']
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    def _update_status(msg):
        job.meta['status'] = msg
        job.save_meta()

    if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE:
        for media_type, media_files in media.items():

            if not media_files:
                continue

            # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl')
            if manifest_file and not os.path.exists(
                    db_data.get_manifest_path()):
                shutil.copyfile(os.path.join(upload_dir, manifest_file),
                                db_data.get_manifest_path())
                if upload_dir != settings.SHARE_ROOT:
                    os.remove(os.path.join(upload_dir, manifest_file))

            if task_mode == MEDIA_TYPES['video']['mode']:
                try:
                    manifest_is_prepared = False
                    if manifest_file:
                        try:
                            manifest = VideoManifestValidator(
                                source_path=os.path.join(
                                    upload_dir, media_files[0]),
                                manifest_path=db_data.get_manifest_path())
                            manifest.init_index()
                            manifest.validate_seek_key_frames()
                            manifest.validate_frame_numbers()
                            assert len(manifest) > 0, 'No key frames.'

                            all_frames = manifest.video_length
                            video_size = manifest.video_resolution
                            manifest_is_prepared = True
                        except Exception as ex:
                            manifest.remove()
                            if isinstance(ex, AssertionError):
                                base_msg = str(ex)
                            else:
                                base_msg = 'Invalid manifest file was upload.'
                                slogger.glob.warning(str(ex))
                            _update_status(
                                '{} Start prepare a valid manifest file.'.
                                format(base_msg))

                    if not manifest_is_prepared:
                        _update_status('Start prepare a manifest file')
                        manifest = VideoManifestManager(
                            db_data.get_manifest_path())
                        manifest.link(media_file=media_files[0],
                                      upload_dir=upload_dir,
                                      chunk_size=db_data.chunk_size)
                        manifest.create()
                        _update_status('A manifest had been created')

                        all_frames = len(manifest.reader)
                        video_size = manifest.reader.resolution
                        manifest_is_prepared = True

                    db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \
                        if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
                    video_path = os.path.join(upload_dir, media_files[0])
                except Exception as ex:
                    db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM
                    manifest.remove()
                    del manifest
                    base_msg = str(ex) if isinstance(ex, AssertionError) \
                        else "Uploaded video does not support a quick way of task creating."
                    _update_status(
                        "{} The task will be created using the old method".
                        format(base_msg))
            else:  # images, archive, pdf
                db_data.size = len(extractor)
                manifest = ImageManifestManager(db_data.get_manifest_path())
                if not manifest_file:
                    manifest.link(
                        sources=extractor.absolute_source_paths,
                        meta={
                            k: {
                                'related_images': related_images[k]
                            }
                            for k in related_images
                        },
                        data_dir=upload_dir,
                        DIM_3D=(
                            db_task.dimension == models.DimensionType.DIM_3D),
                    )
                    manifest.create()
                else:
                    manifest.init_index()
                counter = itertools.count()
                for _, chunk_frames in itertools.groupby(
                        extractor.frame_range,
                        lambda x: next(counter) // db_data.chunk_size):
                    chunk_paths = [(extractor.get_path(i), i)
                                   for i in chunk_frames]
                    img_sizes = []

                    for chunk_path, frame_id in chunk_paths:
                        properties = manifest[manifest_index(frame_id)]

                        # check mapping
                        if not chunk_path.endswith(
                                f"{properties['name']}{properties['extension']}"
                        ):
                            raise Exception(
                                'Incorrect file mapping to manifest content')
                        if db_task.dimension == models.DimensionType.DIM_2D:
                            resolution = (properties['width'],
                                          properties['height'])
                        else:
                            resolution = extractor.get_image_size(frame_id)
                        img_sizes.append(resolution)

                    db_images.extend([
                        models.Image(data=db_data,
                                     path=os.path.relpath(path, upload_dir),
                                     frame=frame,
                                     width=w,
                                     height=h)
                        for (path, frame), (w,
                                            h) in zip(chunk_paths, img_sizes)
                    ])

    if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(
            extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data,
                                                original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(
                chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(
                chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(data=db_data,
                                 path=os.path.relpath(data[1], upload_dir),
                                 frame=data[2],
                                 width=size[0],
                                 height=size[1])
                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        created_images = models.Image.objects.filter(data_id=db_data.id)

        db_related_files = [
            models.RelatedFile(data=image.data,
                               primary_image=image,
                               path=os.path.join(upload_dir,
                                                 related_file_path))
            for image in created_images
            for related_file_path in related_images.get(image.path, [])
        ]
        models.RelatedFile.objects.bulk_create(db_related_files)
        db_images = []
    else:
        models.Video.objects.create(data=db_data,
                                    path=os.path.relpath(
                                        video_path, upload_dir),
                                    width=video_size[0],
                                    height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (
            db_data.size - 1) * db_data.get_frame_step()
    else:
        # validate stop_frame
        db_data.stop_frame = min(db_data.stop_frame, \
            db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step())

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Found frames {} for Data #{}".format(
        db_data.size, db_data.id))
    _save_task_to_db(db_task)
def _create_thread(tid, data):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    if db_task.data.size != 0:
        raise NotImplementedError("Adding more data is not implemented")

    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    media = _count_files(data)
    media, task_mode = _validate_data(media)

    if data['server_files']:
        _copy_data_from_share(data['server_files'], upload_dir)

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=[os.path.join(upload_dir, f) for f in media_files],
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )
    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[
        'use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(
                progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter +
                                        1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter
    original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter

    compressed_chunk_writer = compressed_chunk_writer_class(
        db_data.image_quality)
    original_chunk_writer = original_chunk_writer_class(100)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            w, h = extractor.get_image_size()
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    counter = itertools.count()
    generator = itertools.groupby(
        extractor, lambda x: next(counter) // db_data.chunk_size)
    for chunk_idx, chunk_data in generator:
        chunk_data = list(chunk_data)
        original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
        original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path)

        compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx)
        img_sizes = compressed_chunk_writer.save_as_chunk(
            chunk_data, compressed_chunk_path)

        if db_task.mode == 'annotation':
            db_images.extend([
                models.Image(data=db_data,
                             path=os.path.relpath(data[1], upload_dir),
                             frame=data[2],
                             width=size[0],
                             height=size[1])
                for data, size in zip(chunk_data, img_sizes)
            ])
        else:
            video_size = img_sizes[0]
            video_path = chunk_data[0][1]

        db_data.size += len(chunk_data)
        progress = extractor.get_progress(chunk_data[-1][2])
        update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        db_images = []
    else:
        models.Video.objects.create(data=db_data,
                                    path=os.path.relpath(
                                        video_path, upload_dir),
                                    width=video_size[0],
                                    height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (
            db_data.size - 1) * db_data.get_frame_step()

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Founded frames {} for Data #{}".format(
        db_data.size, db_data.id))
    _save_task_to_db(db_task)
示例#8
0
def _create_thread(tid, data):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    if db_task.data.size != 0:
        raise NotImplementedError("Adding more data is not implemented")

    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    media = _count_files(data)
    media, task_mode = _validate_data(media)

    if data['server_files']:
        _copy_data_from_share(data['server_files'], upload_dir)

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=[os.path.join(upload_dir, f) for f in media_files],
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )
    if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
        extractor.extract()
    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[
        'use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(
                progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter +
                                        1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter
    original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter

    compressed_chunk_writer = compressed_chunk_writer_class(
        db_data.image_quality)
    original_chunk_writer = original_chunk_writer_class(100)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            w, h = extractor.get_image_size()
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
        for media_type, media_files in media.items():
            if media_files:
                if task_mode == MEDIA_TYPES['video']['mode']:
                    try:
                        analyzer = AnalyzeVideo(source_path=os.path.join(
                            upload_dir, media_files[0]))
                        analyzer.check_type_first_frame()
                        analyzer.check_video_timestamps_sequences()

                        meta_info = PrepareInfo(
                            source_path=os.path.join(upload_dir,
                                                     media_files[0]),
                            meta_path=os.path.join(upload_dir,
                                                   'meta_info.txt'))
                        meta_info.save_key_frames()
                        meta_info.check_seek_key_frames()
                        meta_info.save_meta_info()

                        all_frames = meta_info.get_task_size()
                        db_data.size = len(
                            range(
                                db_data.start_frame,
                                min(
                                    data['stop_frame'] +
                                    1 if data['stop_frame'] else all_frames,
                                    all_frames), db_data.get_frame_step()))
                        video_path = os.path.join(upload_dir, media_files[0])
                        frame = meta_info.key_frames.get(
                            next(iter(meta_info.key_frames)))
                        video_size = (frame.width, frame.height)

                    except Exception:
                        db_data.storage_method = StorageMethodChoice.FILE_SYSTEM

                else:  #images,archive
                    counter_ = itertools.count()
                    if isinstance(extractor,
                                  MEDIA_TYPES['archive']['extractor']):
                        media_files = [
                            os.path.relpath(path, upload_dir)
                            for path in extractor._source_path
                        ]
                    elif isinstance(extractor,
                                    (MEDIA_TYPES['zip']['extractor'],
                                     MEDIA_TYPES['pdf']['extractor'])):
                        media_files = extractor._source_path

                    numbers_sequence = range(
                        db_data.start_frame,
                        min(
                            data['stop_frame']
                            if data['stop_frame'] else len(media_files),
                            len(media_files)), db_data.get_frame_step())
                    m_paths = []
                    m_paths = [(path, numb)
                               for numb, path in enumerate(sorted(media_files))
                               if numb in numbers_sequence]

                    for chunk_number, media_paths in itertools.groupby(
                            m_paths,
                            lambda x: next(counter_) // db_data.chunk_size):
                        media_paths = list(media_paths)
                        img_sizes = []
                        from PIL import Image
                        with open(db_data.get_dummy_chunk_path(chunk_number),
                                  'w') as dummy_chunk:
                            for path, _ in media_paths:
                                dummy_chunk.write(path + '\n')
                                img_sizes += [
                                    Image.open(os.path.join(upload_dir,
                                                            path)).size
                                ]

                        db_data.size += len(media_paths)
                        db_images.extend([
                            models.Image(data=db_data,
                                         path=data[0],
                                         frame=data[1],
                                         width=size[0],
                                         height=size[1])
                            for data, size in zip(media_paths, img_sizes)
                        ])

    if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(
            extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data,
                                                original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(
                chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(
                chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(data=db_data,
                                 path=os.path.relpath(data[1], upload_dir),
                                 frame=data[2],
                                 width=size[0],
                                 height=size[1])
                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        db_images = []
    else:
        models.Video.objects.create(data=db_data,
                                    path=os.path.relpath(
                                        video_path, upload_dir),
                                    width=video_size[0],
                                    height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (
            db_data.size - 1) * db_data.get_frame_step()

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Founded frames {} for Data #{}".format(
        db_data.size, db_data.id))
    _save_task_to_db(db_task)
示例#9
0
def create_or_update(dl_model_id, name, model_file, weights_file,
                     labelmap_file, interpretation_file, owner, storage,
                     is_shared):
    def get_abs_path(share_path):
        if not share_path:
            return share_path
        share_root = settings.SHARE_ROOT
        relpath = os.path.normpath(share_path).lstrip('/')
        if '..' in relpath.split(os.path.sep):
            raise Exception('Permission denied')
        abspath = os.path.abspath(os.path.join(share_root, relpath))
        if os.path.commonprefix([share_root, abspath]) != share_root:
            raise Exception('Bad file path on share: ' + abspath)
        return abspath

    def save_file_as_tmp(data):
        if not data:
            return None
        fd, filename = tempfile.mkstemp()
        with open(filename, 'wb') as tmp_file:
            for chunk in data.chunks():
                tmp_file.write(chunk)
        os.close(fd)
        return filename

    is_create_request = dl_model_id is None
    if is_create_request:
        dl_model_id = create_empty(owner=owner)

    run_tests = bool(model_file or weights_file or labelmap_file
                     or interpretation_file)
    if storage != "local":
        model_file = get_abs_path(model_file)
        weights_file = get_abs_path(weights_file)
        labelmap_file = get_abs_path(labelmap_file)
        interpretation_file = get_abs_path(interpretation_file)
    else:
        model_file = save_file_as_tmp(model_file)
        weights_file = save_file_as_tmp(weights_file)
        labelmap_file = save_file_as_tmp(labelmap_file)
        interpretation_file = save_file_as_tmp(interpretation_file)

    files_to_scan = []
    if model_file:
        files_to_scan.append(model_file)
    if weights_file:
        files_to_scan.append(weights_file)
    if labelmap_file:
        files_to_scan.append(labelmap_file)
    if interpretation_file:
        files_to_scan.append(interpretation_file)
    av_scan_paths(*files_to_scan)

    if owner:
        restricted = not has_admin_role(owner)
    else:
        restricted = not has_admin_role(
            AnnotationModel.objects.get(pk=dl_model_id).owner)

    rq_id = "auto_annotation.create.{}".format(dl_model_id)
    queue = django_rq.get_queue("default")
    queue.enqueue_call(func=_update_dl_model_thread,
                       args=(dl_model_id, name, is_shared, model_file,
                             weights_file, labelmap_file, interpretation_file,
                             run_tests, storage == "local", is_create_request,
                             restricted),
                       job_id=rq_id)

    return rq_id
示例#10
0
def _import_project(filename, user, org_id):
    av_scan_paths(filename)
    project_importer = ProjectImporter(filename, user, org_id)
    db_project = project_importer.import_project()
    return db_project.id