Exemplo n.º 1
0
def fill_task_meta_data_forward(apps, schema_editor):
    db_alias = schema_editor.connection.alias
    task_model = apps.get_model('engine', 'Task')
    video_model = apps.get_model('engine', "Video")
    image_model = apps.get_model('engine', 'Image')

    for db_task in task_model.objects.all():
        if db_task.mode == 'interpolation':
            db_video = video_model()
            db_video.task_id = db_task.id
            db_video.start_frame = 0
            db_video.stop_frame = db_task.size
            db_video.step = 1

            video = ""
            for root, _, files in os.walk(_get_upload_dirname(db_task)):
                fullnames = map(lambda f: os.path.join(root, f), files)
                videos = list(
                    filter(lambda x: get_mime(x) == 'video', fullnames))
                if len(videos):
                    video = videos[0]
                    break
            db_video.path = video
            try:
                image = Image.open(_get_frame_path(db_task, 0))
                db_video.width = image.width
                db_video.height = image.height
                image.close()
            except FileNotFoundError:
                db_video.width = 0
                db_video.height = 0

            db_video.save()
        else:
            filenames = []
            for root, _, files in os.walk(_get_upload_dirname(db_task)):
                fullnames = map(lambda f: os.path.join(root, f), files)
                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                filenames.extend(images)
            filenames.sort()

            db_images = []
            for i, image_path in enumerate(filenames):
                db_image = image_model()
                db_image.task_id = db_task.id
                db_image.path = image_path
                db_image.frame = i
                try:
                    image = Image.open(image_path)
                    db_image.width = image.width
                    db_image.height = image.height
                    image.close()
                except FileNotFoundError:
                    db_image.width = 0
                    db_image.height = 0

                db_images.append(db_image)
            image_model.objects.using(db_alias).bulk_create(db_images)
Exemplo n.º 2
0
def make_image_meta_cache(db_task):
    with open(db_task.get_image_meta_cache_path(), 'w') as meta_file:
        cache = {
            'original_size': []
        }

        if db_task.mode == 'interpolation':
            image = Image.open(db_task.get_frame_path(0))
            cache['original_size'].append({
                'width': image.size[0],
                'height': image.size[1]
            })
            image.close()
        else:
            filenames = []
            for root, _, files in os.walk(db_task.get_upload_dirname()):
                fullnames = map(lambda f: os.path.join(root, f), files)
                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                filenames.extend(images)
            filenames.sort()

            for image_path in filenames:
                image = Image.open(image_path)
                cache['original_size'].append({
                    'width': image.size[0],
                    'height': image.size[1]
                })
                image.close()

        meta_file.write(str(cache))
Exemplo n.º 3
0
 def count_files(file_mapping, counter):
     for rel_path, full_path in file_mapping.items():
         mime = get_mime(full_path)
         if mime in counter:
             counter[mime].append(rel_path)
         else:
             slogger.glob.warn("Skip '{}' file (its mime type doesn't "
                 "correspond to a video or an image file)".format(full_path))
Exemplo n.º 4
0
 def count_files(file_mapping, counter):
     for rel_path, full_path in file_mapping.items():
         mime = get_mime(full_path)
         if mime in counter:
             counter[mime].append(rel_path)
         elif rel_path.endswith('.jsonl'):
             manifest_files.append(rel_path)
         else:
             slogger.glob.warn(
                 "Skip '{}' file (its mime type doesn't "
                 "correspond to supported MIME file type)".format(
                     full_path))
Exemplo n.º 5
0
def migrate2manifest(apps, shema_editor):
    logger = get_logger(MIGRATION_NAME, MIGRATION_LOG)
    logger.info(
        'The data migration has been started for creating manifest`s files')
    query_set = _get_query_set(apps)
    logger.info('Need to update {} data objects'.format(len(query_set)))
    for db_data in query_set:
        try:
            upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT,
                                            db_data.id)
            logger.info('Migrate data({}), folder - {}'.format(
                db_data.id, upload_dir))
            if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')):
                os.remove(os.path.join(upload_dir, 'meta_info.txt'))
                logger.info(
                    '{}/meta_info.txt has been deleted'.format(upload_dir))
            else:
                for path in glob.glob(f'{upload_dir}/dummy_*.txt'):
                    os.remove(path)
                    logger.info(f"{path} has been deleted")
            # it's necessary for case with long data migration
            if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
                logger.info('Manifest file already exists')
                continue
            data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
            if hasattr(db_data, 'video'):
                media_file = os.path.join(data_dir, db_data.video.path)
                manifest = VideoManifestManager(manifest_path=upload_dir)
                logger.info(
                    'Preparing of the video meta information has begun')
                meta_info = manifest.prepare_meta(media_file=media_file,
                                                  force=True)
                logger.info('Manifest creating has begun')
                manifest.create(meta_info)
                logger.info('Index creating has begun')
                manifest.init_index()
            else:
                manifest = ImageManifestManager(manifest_path=upload_dir)
                sources = []
                if db_data.storage == StorageChoice.LOCAL:
                    for (root, _, files) in os.walk(data_dir):
                        sources.extend([
                            os.path.join(root, f) for f in files
                            if get_mime(f) == 'image'
                        ])
                    sources.sort()
                # using share, this means that we can not explicitly restore the entire data structure
                else:
                    sources = [
                        os.path.join(data_dir, db_image.path)
                        for db_image in db_data.images.all().order_by('frame')
                    ]
                if any(
                        list(
                            filter(
                                lambda x: x.dimension == DimensionType.DIM_3D,
                                db_data.tasks.all()))):
                    logger.info(
                        'Preparing of images 3d meta information has begun')
                    content = []
                    for source in sources:
                        name, ext = os.path.splitext(
                            os.path.relpath(source, upload_dir))
                        content.append({'name': name, 'extension': ext})
                else:
                    logger.info(
                        'Preparing of 2d images meta information has begun')
                    meta_info = manifest.prepare_meta(sources=sources,
                                                      data_dir=data_dir)
                    content = meta_info.content

                if db_data.storage == StorageChoice.SHARE:

                    def _get_frame_step(str_):
                        match = search("step\s*=\s*([1-9]\d*)", str_)
                        return int(match.group(1)) if match else 1

                    logger.info(
                        'Data is located on the share, metadata update has been started'
                    )
                    step = _get_frame_step(db_data.frame_filter)
                    start = db_data.start_frame
                    stop = db_data.stop_frame + 1
                    images_range = range(start, stop, step)
                    result_content = []
                    for i in range(stop):
                        item = content.pop(0) if i in images_range else dict()
                        result_content.append(item)
                    content = result_content
                logger.info('Manifest creating has begun')
                manifest.create(content)
                logger.info('Index creating has begun')
                manifest.init_index()
            logger.info('Succesfull migration for the data({})'.format(
                db_data.id))
        except Exception as ex:
            logger.error(str(ex))
Exemplo n.º 6
0
def migrate_task_data(db_task_id, db_data_id, original_video, original_images, size, start_frame,
    stop_frame, frame_filter, image_quality, chunk_size, return_dict):
    try:
        db_data_dir = os.path.join(settings.MEDIA_DATA_ROOT, str(db_data_id))
        compressed_cache_dir = os.path.join(db_data_dir, 'compressed')
        original_cache_dir = os.path.join(db_data_dir, 'original')
        old_db_task_dir = os.path.join(settings.DATA_ROOT, str(db_task_id))
        old_task_data_dir = os.path.join(old_db_task_dir, 'data')
        if os.path.exists(old_task_data_dir) and size != 0:
            if original_video:
                if os.path.exists(original_video):
                    _stop_frame = stop_frame if stop_frame else None
                    reader = VideoReader([original_video], get_frame_step(frame_filter), start_frame, _stop_frame)
                    original_chunk_writer = Mpeg4ChunkWriter(100)
                    compressed_chunk_writer = ZipCompressedChunkWriter(image_quality)

                    counter = itertools.count()
                    generator = itertools.groupby(reader, lambda x: next(counter) // chunk_size)
                    for chunk_idx, chunk_images in generator:
                        chunk_images = list(chunk_images)
                        original_chunk_path = os.path.join(original_cache_dir, '{}.mp4'.format(chunk_idx))
                        original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path)

                        compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx))
                        compressed_chunk_writer.save_as_chunk(chunk_images, compressed_chunk_path)

                    preview = reader.get_preview()
                    preview.save(os.path.join(db_data_dir, 'preview.jpeg'))
                else:
                    original_chunk_writer = ZipChunkWriter(100)
                    for chunk_idx, chunk_image_ids in enumerate(slice_by_size(range(size), chunk_size)):
                        chunk_images = []
                        for image_id in chunk_image_ids:
                            image_path = get_frame_path(old_task_data_dir, image_id)
                            chunk_images.append((image_path, image_path))

                        original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx))
                        original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path)

                        compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx))
                        os.symlink(original_chunk_path, compressed_chunk_path)
                        shutil.copyfile(get_frame_path(old_task_data_dir, image_id), os.path.join(db_data_dir, 'preview.jpeg'))
            else:
                reader = None
                if os.path.exists(original_images[0]): # task created from images
                    reader = ImageListReader(original_images)
                else: # task created from archive or pdf
                    archives = []
                    pdfs = []
                    zips = []
                    for p in glob.iglob(os.path.join(db_data_dir, 'raw', '**', '*'), recursive=True):
                        mime_type = get_mime(p)
                        if mime_type == 'archive':
                            archives.append(p)
                        elif mime_type == 'pdf':
                            pdfs.append(p)
                        elif mime_type == 'zip':
                            zips.append(p)
                    if archives:
                        reader = ArchiveReader(archives)
                    elif zips:
                        reader = ZipReader(archives)
                    elif pdfs:
                        reader = PdfReader(pdfs)

                if not reader:
                    original_chunk_writer = ZipChunkWriter(100)
                    for chunk_idx, chunk_image_ids in enumerate(slice_by_size(range(size), chunk_size)):
                        chunk_images = []
                        for image_id in chunk_image_ids:
                            image_path = get_frame_path(old_task_data_dir, image_id)
                            chunk_images.append((image_path, image_path))

                        original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx))
                        original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path)

                        compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx))
                        os.symlink(original_chunk_path, compressed_chunk_path)
                        shutil.copyfile(get_frame_path(old_task_data_dir, image_id), os.path.join(db_data_dir, 'preview.jpeg'))
                else:
                    original_chunk_writer = ZipChunkWriter(100)
                    compressed_chunk_writer = ZipCompressedChunkWriter(image_quality)

                    counter = itertools.count()
                    generator = itertools.groupby(reader, lambda x: next(counter) // chunk_size)
                    for chunk_idx, chunk_images in generator:
                        chunk_images = list(chunk_images)
                        compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx))
                        compressed_chunk_writer.save_as_chunk(chunk_images, compressed_chunk_path)

                        original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx))
                        original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path)

                    preview = reader.get_preview()
                    preview.save(os.path.join(db_data_dir, 'preview.jpeg'))
            shutil.rmtree(old_db_task_dir)
        return_dict[db_task_id] = (True, '')
    except Exception as e:
        traceback.print_exc(file=sys.stderr)
        return_dict[db_task_id] = (False, str(e))
    return 0
Exemplo n.º 7
0
 def count_files(file_mapping, counter):
     for rel_path, full_path in file_mapping.items():
         mime = get_mime(full_path)
         counter[mime].append(rel_path)
Exemplo n.º 8
0
def _validate_data(data):
    share_root = settings.SHARE_ROOT
    server_files = {
        'dirs': [],
        'files': [],
    }

    for path in data["server_files"]:
        path = os.path.normpath(path).lstrip('/')
        if '..' in path.split(os.path.sep):
            raise ValueError("Don't use '..' inside file paths")
        full_path = os.path.abspath(os.path.join(share_root, path))
        if 'directory' == get_mime(full_path):
            server_files['dirs'].append(path)
        else:
            server_files['files'].append(path)
        if os.path.commonprefix([share_root, full_path]) != share_root:
            raise ValueError("Bad file path: " + path)

    # Remove directories if other files from them exists in server files
    data['server_files'] = server_files['files'] + [
        dir_name for dir_name in server_files['dirs'] if not [
            f_name
            for f_name in server_files['files'] if f_name.startswith(dir_name)
        ]
    ]

    def count_files(file_mapping, counter):
        for rel_path, full_path in file_mapping.items():
            mime = get_mime(full_path)
            counter[mime].append(rel_path)

    counter = {media_type: [] for media_type in MEDIA_TYPES.keys()}

    count_files(
        file_mapping={
            f: f
            for f in data['remote_files'] or data['client_files']
        },
        counter=counter,
    )

    count_files(
        file_mapping={
            f: os.path.abspath(os.path.join(share_root, f))
            for f in data['server_files']
        },
        counter=counter,
    )

    unique_entries = 0
    multiple_entries = 0
    for media_type, media_config in MEDIA_TYPES.items():
        if counter[media_type]:
            if media_config['unique']:
                unique_entries += len(counter[media_type])
            else:
                multiple_entries += len(counter[media_type])

    if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
        unique_types = ', '.join(
            [k for k, v in MEDIA_TYPES.items() if v['unique']])
        multiply_types = ', '.join(
            [k for k, v in MEDIA_TYPES.items() if not v['unique']])
        count = ', '.join(
            ['{} {}(s)'.format(len(v), k) for k, v in counter.items()])
        raise ValueError('Only one {} or many {} can be used simultaneously, \
            but {} found.'.format(unique_types, multiply_types, count))

    if unique_entries == 0 and multiple_entries == 0:
        raise ValueError('No media data found')

    return counter