class VideoDatasetManifestReader(FragmentMediaReader): def __init__(self, manifest_path, **kwargs): self.source_path = kwargs.pop('source_path') super().__init__(**kwargs) self._manifest = VideoManifestManager(manifest_path) self._manifest.init_index() def _get_nearest_left_key_frame(self): if self._start_chunk_frame_number >= \ self._manifest[len(self._manifest) - 1].get('number'): left_border = len(self._manifest) - 1 else: left_border = 0 delta = len(self._manifest) while delta: step = delta // 2 cur_position = left_border + step if self._manifest[cur_position].get( 'number') < self._start_chunk_frame_number: cur_position += 1 left_border = cur_position delta -= step + 1 else: delta = step if self._manifest[cur_position].get( 'number') > self._start_chunk_frame_number: left_border -= 1 frame_number = self._manifest[left_border].get('number') timestamp = self._manifest[left_border].get('pts') return frame_number, timestamp def __iter__(self): start_decode_frame_number, start_decode_timestamp = self._get_nearest_left_key_frame( ) with closing(av.open(self.source_path, mode='r')) as container: video_stream = next(stream for stream in container.streams if stream.type == 'video') video_stream.thread_type = 'AUTO' container.seek(offset=start_decode_timestamp, stream=video_stream) frame_number = start_decode_frame_number - 1 for packet in container.demux(video_stream): for frame in packet.decode(): frame_number += 1 if frame_number in self._frame_range: if video_stream.metadata.get('rotate'): frame = av.VideoFrame().from_ndarray( rotate_image( frame.to_ndarray(format='bgr24'), 360 - int(container.streams.video[0]. metadata.get('rotate'))), format='bgr24') yield frame elif frame_number < self._frame_range[-1]: continue else: return
def migrate2meta(apps, shema_editor): logger = get_logger(MIGRATION_NAME, MIGRATION_LOG) query_set = _get_query_set(apps) for db_data in query_set: try: upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) logger.info('Migrate data({}), folder - {}'.format( db_data.id, upload_dir)) meta_path = os.path.join(upload_dir, "meta_info.txt") if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): os.remove(os.path.join(upload_dir, 'manifest.jsonl')) logger.info('A manifest file has been deleted') if os.path.exists(os.path.join(upload_dir, 'index.json')): os.remove(os.path.join(upload_dir, 'index.json')) logger.info('A manifest index file has been deleted') data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): if os.path.exists(meta_path): logger.info('A meta_info.txt already exists') continue media_file = os.path.join(data_dir, db_data.video.path) logger.info('Preparing of the video meta has begun') meta = VideoManifestManager(manifest_path=upload_dir) \ .prepare_meta(media_file=media_file, force=True) with open(meta_path, "w") as meta_file: for idx, pts, _ in meta: meta_file.write(f"{idx} {pts}\n") else: name_format = "dummy_{}.txt" sources = [ db_image.path for db_image in db_data.images.all().order_by('frame') ] counter = itertools.count() logger.info('Preparing of the dummy chunks has begun') for idx, img_paths in itertools.groupby( sources, lambda x: next(counter) // db_data.chunk_size): if os.path.exists( os.path.join(upload_dir, name_format.format(idx))): logger.info( name_format.format(idx) + " already exists") continue with open( os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk: dummy_chunk.writelines( [f"{img_path}\n" for img_path in img_paths]) logger.info('Succesfull migration for the data({})'.format( db_data.id)) except Exception as ex: logger.error(str(ex))
def migrate2manifest(apps, shema_editor): logger = get_logger(MIGRATION_NAME, MIGRATION_LOG) logger.info( 'The data migration has been started for creating manifest`s files') query_set = _get_query_set(apps) logger.info('Need to update {} data objects'.format(len(query_set))) for db_data in query_set: try: upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) logger.info('Migrate data({}), folder - {}'.format( db_data.id, upload_dir)) if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')): os.remove(os.path.join(upload_dir, 'meta_info.txt')) logger.info( '{}/meta_info.txt has been deleted'.format(upload_dir)) else: for path in glob.glob(f'{upload_dir}/dummy_*.txt'): os.remove(path) logger.info(f"{path} has been deleted") # it's necessary for case with long data migration if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): logger.info('Manifest file already exists') continue data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): media_file = os.path.join(data_dir, db_data.video.path) manifest = VideoManifestManager(manifest_path=upload_dir) logger.info( 'Preparing of the video meta information has begun') meta_info = manifest.prepare_meta(media_file=media_file, force=True) logger.info('Manifest creating has begun') manifest.create(meta_info) logger.info('Index creating has begun') manifest.init_index() else: manifest = ImageManifestManager(manifest_path=upload_dir) sources = [] if db_data.storage == StorageChoice.LOCAL: for (root, _, files) in os.walk(data_dir): sources.extend([ os.path.join(root, f) for f in files if get_mime(f) == 'image' ]) sources.sort() # using share, this means that we can not explicitly restore the entire data structure else: sources = [ os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame') ] if any( list( filter( lambda x: x.dimension == DimensionType.DIM_3D, db_data.tasks.all()))): logger.info( 'Preparing of images 3d meta information has begun') content = [] for source in sources: name, ext = os.path.splitext( os.path.relpath(source, upload_dir)) content.append({'name': name, 'extension': ext}) else: logger.info( 'Preparing of 2d images meta information has begun') meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir) content = meta_info.content if db_data.storage == StorageChoice.SHARE: def _get_frame_step(str_): match = search("step\s*=\s*([1-9]\d*)", str_) return int(match.group(1)) if match else 1 logger.info( 'Data is located on the share, metadata update has been started' ) step = _get_frame_step(db_data.frame_filter) start = db_data.start_frame stop = db_data.stop_frame + 1 images_range = range(start, stop, step) result_content = [] for i in range(stop): item = content.pop(0) if i in images_range else dict() result_content.append(item) content = result_content logger.info('Manifest creating has begun') manifest.create(content) logger.info('Index creating has begun') manifest.init_index() logger.info('Succesfull migration for the data({})'.format( db_data.id)) except Exception as ex: logger.error(str(ex))
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) else: upload_dir = settings.SHARE_ROOT av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = StorageChoice.LOCAL extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() validate_dimension.set_path(os.path.split(extractor.get_zip_filename())[0]) validate_dimension.validate() if validate_dimension.dimension == DimensionType.DIM_3D: db_task.dimension = DimensionType.DIM_3D extractor.reconcile( source_files=list(validate_dimension.related_files.keys()), step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=DimensionType.DIM_3D, ) extractor.add_files(validate_dimension.converted_files) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size(0) area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest['properties']['length'] video_size = manifest['properties']['resolution'] manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else:# images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[frame_id] if db_task.dimension == DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': if validate_dimension.dimension == DimensionType.DIM_2D: models.Image.objects.bulk_create(db_images) else: related_file = [] for image_data in db_images: image_model = models.Image( data=image_data.data, path=image_data.path, frame=image_data.frame, width=image_data.width, height=image_data.height ) image_model.save() image_data = models.Image.objects.get(id=image_model.id) if validate_dimension.related_files.get(image_data.path, None): for related_image_file in validate_dimension.related_files[image_data.path]: related_file.append( RelatedFile(data=db_data, primary_image_id=image_data.id, path=related_image_file)) RelatedFile.objects.bulk_create(related_file) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(tid, data, isImport=False): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files']: if db_data.storage != models.StorageChoice.CLOUD_STORAGE: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT else: # cloud storage if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) first_sorted_media_image = sorted(media['image'])[0] cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) ) cloud_storage_manifest.set_index() media_files = sorted(media['image']) content = cloud_storage_manifest.get_subset(media_files) manifest.create(content) manifest.init_index() av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len(media['image']): media['image'].extend( [os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor']( source_path=[os.path.join(upload_dir, f) for f in media['directory']], ).absolute_source_paths ] ) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count() > 1 and db_task.project.tasks.first().dimension != validate_dimension.dimension: raise Exception(f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})') if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D extractor.reconcile( source_files=[os.path.join(upload_dir, f) for f in validate_dimension.related_files.keys()], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search(r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == models.DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, meta={ k: {'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'meta': { 'related_images': related_images[''.join((name, ext))] }, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): if isinstance(db_task, int): db_task = models.Task.objects.select_for_update().get(pk=db_task) slogger.glob.info("create task #{}".format(db_task.id)) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files'] and not isDatasetImport: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_files = [] media = _count_files(data, manifest_files) media, task_mode = _validate_data(media, manifest_files) if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path')) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT manifest_root = None if db_data.storage in { models.StorageChoice.LOCAL, models.StorageChoice.SHARE }: manifest_root = upload_dir elif db_data.storage == models.StorageChoice.CLOUD_STORAGE: manifest_root = db_data.cloud_storage.get_storage_dirname() manifest_file = _validate_manifest(manifest_files, manifest_root) if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): raise Exception( "File with meta information can be uploaded if 'Use cache' option is also selected" ) if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE: if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance( cloud_provider=db_cloud_storage.provider_type, **details) sorted_media = sort(media['image'], data['sorting_method']) first_sorted_media_image = sorted_media[0] cloud_storage_instance.download_file( first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file # NOTE we should create manifest before defining chunk_size # FIXME in the future when will be implemented archive support manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), db_data.cloud_storage.get_storage_dirname()) cloud_storage_manifest.set_index() sequence, content = cloud_storage_manifest.get_subset(sorted_media) sorted_content = (i[1] for i in sorted(zip(sequence, content))) manifest.create(sorted_content) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len( media['image']): media['image'].extend([ os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor'](source_path=[ os.path.join(upload_dir, f) for f in media['directory'] ], ).absolute_source_paths ]) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') if ( isDatasetImport or isBackupRestore ) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer( db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' source_paths = [os.path.join(upload_dir, f) for f in media_files] if manifest_file and not isBackupRestore and data[ 'sorting_method'] in { models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED }: raise Exception( "It isn't supported to upload manifest file and use random sorting" ) if isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \ data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}: raise Exception( "It isn't supported to import the task that was created without cache but with random/predefined sorting" ) details = { 'source_path': source_paths, 'step': db_data.get_frame_step(), 'start': db_data.start_frame, 'stop': data['stop_frame'], } if media_type in { 'archive', 'zip', 'pdf' } and db_data.storage == models.StorageChoice.SHARE: details['extract_dir'] = db_data.get_upload_dirname() upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if media_type != 'video': details['sorting_method'] = data['sorting_method'] extractor = MEDIA_TYPES[media_type]['extractor'](**details) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count( ) > 1 and db_task.project.tasks.first( ).dimension != validate_dimension.dimension: raise Exception( f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})' ) if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D keys_of_related_files = validate_dimension.related_files.keys() absolute_keys_of_related_files = [ os.path.join(upload_dir, f) for f in keys_of_related_files ] # When a task is created, the sorting method can be random and in this case, reinitialization will be with correct sorting # but when a task is restored from a backup, a random sorting is changed to predefined and we need to manually sort files # in the correct order. source_files = absolute_keys_of_related_files if not isBackupRestore else \ [item for item in extractor.absolute_source_paths if item in absolute_keys_of_related_files] extractor.reconcile( source_files=source_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search( r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) if isBackupRestore and not isinstance(extractor, MEDIA_TYPES['video']['extractor']) and db_data.storage_method == models.StorageMethodChoice.CACHE and \ db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} and validate_dimension.dimension != models.DimensionType.DIM_3D: # we should sort media_files according to the manifest content sequence # and we should do this in general after validation step for 3D data and after filtering from related_images manifest = ImageManifestManager(db_data.get_manifest_path()) manifest.set_index() sorted_media_files = [] for idx in range(len(extractor.absolute_source_paths)): properties = manifest[idx] image_name = properties.get('name', None) image_extension = properties.get('extension', None) full_image_path = os.path.join( upload_dir, f"{image_name}{image_extension}" ) if image_name and image_extension else None if full_image_path and full_image_path in extractor: sorted_media_files.append(full_image_path) media_files = sorted_media_files.copy() del sorted_media_files data['sorting_method'] = models.SortingMethod.PREDEFINED extractor.reconcile( source_files=media_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], sorting_method=data['sorting_method'], ) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_message = 'Images are being compressed' if not progress: status_message = '{} {}'.format( status_message, progress_animation[update_progress.call_counter]) job.meta['status'] = status_message job.meta['task_progress'] = progress or 0. job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl') if manifest_file and not os.path.exists( db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file)) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator( source_path=os.path.join( upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: manifest.remove() if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status( '{} Start prepare a valid manifest file.'. format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager( db_data.get_manifest_path()) manifest.link(media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) manifest.create() _update_status('A manifest had been created') all_frames = len(manifest.reader) video_size = manifest.reader.resolution manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM manifest.remove() del manifest base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status( "{} The task will be created using the old method". format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: manifest.link( sources=extractor.absolute_source_paths, meta={ k: { 'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir, DIM_3D=( db_task.dimension == models.DimensionType.DIM_3D), ) manifest.create() else: manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby( extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for chunk_path, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] # check mapping if not chunk_path.endswith( f"{properties['name']}{properties['extension']}" ): raise Exception( 'Incorrect file mapping to manifest content') if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def __init__(self, manifest_path, **kwargs): self.source_path = kwargs.pop('source_path') super().__init__(**kwargs) self._manifest = VideoManifestManager(manifest_path) self._manifest.init_index()
def migrate_data(apps, shema_editor): Data = apps.get_model("engine", "Data") query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE) for db_data in query_set: try: upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')): os.remove(os.path.join(upload_dir, 'meta_info.txt')) else: for path in glob.glob(f'{upload_dir}/dummy_*.txt'): os.remove(path) # it's necessary for case with long data migration if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): continue data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): media_file = os.path.join(data_dir, db_data.video.path) manifest = VideoManifestManager(manifest_path=upload_dir) meta_info = manifest.prepare_meta(media_file=media_file) manifest.create(meta_info) manifest.init_index() else: manifest = ImageManifestManager(manifest_path=upload_dir) sources = [] if db_data.storage == StorageChoice.LOCAL: for (root, _, files) in os.walk(data_dir): sources.extend([os.path.join(root, f) for f in files]) sources.sort() # using share, this means that we can not explicitly restore the entire data structure else: sources = [ os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame') ] if any( list( filter( lambda x: x.dimension == DimensionType.DIM_3D, db_data.tasks.all()))): content = [] for source in sources: name, ext = os.path.splitext( os.path.relpath(source, upload_dir)) content.append({'name': name, 'extension': ext}) else: meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir) content = meta_info.content if db_data.storage == StorageChoice.SHARE: def _get_frame_step(str_): match = search("step\s*=\s*([1-9]\d*)", str_) return int(match.group(1)) if match else 1 step = _get_frame_step(db_data.frame_filter) start = db_data.start_frame stop = db_data.stop_frame + 1 images_range = range(start, stop, step) result_content = [] for i in range(stop): item = content.pop(0) if i in images_range else dict() result_content.append(item) content = result_content manifest.create(content) manifest.init_index() except Exception as ex: print(str(ex))