def format_im_fn(meta): """Formats image filename from VCAT image meta""" sha256 = meta['sa_hash'] # sha256 sha256_tree = file_utils.sha256_tree(sha256) if meta['uploaded'] or meta['fn'] != 'index': if meta['sa_hash'] is not None: fn = '{}_{}{}'.format(meta['sa_hash'], meta['fn'], meta['ext']) else: fn = '{}{}'.format(meta['fn'], meta['ext']) else: fn = '{}_{}{}'.format(meta['sa_hash'], meta['frame'], meta['ext']) return join(fn)
def load_images(self, dir_media, opt_size, opt_density, opt_drawframes=False): sha256_tree = file_utils.sha256_tree(self._sha256) dir_sha256 = join(dir_media, sha256_tree, self._sha256) opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size] # get the keyframe status data to check if images available try: keyframe_status = self.get_metadata(types.Metadata.KEYFRAME_STATUS) except Exception as ex: self.log.error( 'no keyframe metadata. Try: "append -t keyframe_status"') return keyframes = {} # if keyframe images were generated and exist locally if keyframe_status and keyframe_status.get_status(opt_size): keyframe_metadata = self.get_metadata(types.Metadata.KEYFRAME) if not keyframe_metadata: self.log.error( 'no keyframe metadata. Try: "append -t keyframe"') return # get keyframe indices frame_idxs = keyframe_metadata.get_keyframes(opt_density) for frame_idx in frame_idxs: # get keyframe filepath fp_keyframe = join(dir_sha256, file_utils.zpad(frame_idx), opt_size_label, 'index.jpg') try: im = cv.imread(fp_keyframe) im.shape # used to invoke error if file didn't load correctly except: self.log.warn('file not found: {}'.format(fp_keyframe)) # don't add to keyframe dict continue keyframes[frame_idx] = im # append metadata to chair_item's mapping item self.set_keyframes(keyframes, opt_drawframes)
def format_im_url(url_base, meta, size='lg'): """Formats image S3 URLfrom VCAT image meta""" sha256 = meta['sa_hash'] # sha256 sha256_tree = file_utils.sha256_tree(sha256) if meta['uploaded'] or meta['fn'] != 'index': fn = join(str(meta['id']), meta['fn'], '{}{}'.format(size, meta['ext'])) url_path = join('media/images', fn) else: fn = '{}{}'.format(meta['fn'], meta['ext']) if meta['verified']: url_path = join('v1/media/keyframes', sha256_tree, sha256, meta['frame'], 'lg', fn) else: url_path = join('v1/media/keyframes/unverified', sha256_tree, sha256, meta['frame'], 'lg', fn) return join(url_base, url_path)
def cli(ctx, opt_fp_neg, opt_dir_project, opt_disk, opt_size): """Generates negative images""" # ------------------------------------------------ # imports import os from os.path import join from glob import glob from pathlib import Path from vframe.utils import logger_utils, im_utils, file_utils from vframe.settings.paths import Paths log = logger_utils.Logger.getLogger() log.debug('negative mining') dir_media_unver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.UNVERIFIED) dir_media_ver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.VERIFIED) opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size] fp_train_neg = join(opt_dir_project, vcat_cfg.FP_TRAIN_NEGATIVES) dir_labels_negative = join(opt_dir_project, vcat_cfg.DIR_LABELS_NEGATIVE) dir_negative = join(opt_dir_project, vcat_cfg .DIR_IMAGES_NEGATIVE) file_utils.mkdirs(dir_negative) file_utils.mkdirs(dir_labels_negative) negative_list = pd.read_csv(opt_fp_neg) negative_list['description'] = negative_list['description'].fillna('') # ensure not empty # negative_list['desc'] = negative_list['desc'].astype('str') neg_training_files = [] # for sha256 in sha256_list[:35]: for i, row in negative_list.iterrows(): sha256 = row['sha256'] sha256_tree = file_utils.sha256_tree(sha256) ver_list = glob(join(dir_media_ver, sha256_tree, sha256, "*")) unver_list = glob(join(dir_media_unver, sha256_tree, sha256, "*")) dir_frames = ver_list + unver_list log.debug('adding {} frames about "{}"'.format(len(dir_frames), row['description'])) for dir_frame in dir_frames: frame_idx = Path(dir_frame).stem fp_keyframe_src = join(dir_frame, opt_size_label, 'index.jpg') fpp_keyframe_src = Path(fp_keyframe_src) if fpp_keyframe_src.exists(): # create symlinked image fpp_keyframe_dst = Path(join(dir_negative, '{}_{}.jpg'.format(sha256, frame_idx))) if fpp_keyframe_dst.exists() and fpp_keyframe_dst.is_symlink(): fpp_keyframe_dst.unlink() fpp_keyframe_dst.symlink_to(fpp_keyframe_src) # create empty label fp_label_txt = join(dir_labels_negative, '{}_{}.txt'.format(sha256, frame_idx)) with open(fp_label_txt, 'w') as fp: fp.write('') # and, add this file to the training list neg_training_files.append(str(fpp_keyframe_dst)) # for each keyframe if it exists log.info('writing {} lines to: {}'.format(len(neg_training_files), fp_train_neg)) file_utils.write_text(neg_training_files, fp_train_neg) # add prompt log.info('mv labels_negative/*.txt labels/') log.info('mv images_negative/*.jpg images/')
def extract(items, dir_out, dir_videos, keyframe_type, threads=1): """Extracts keyframes from images""" task_queue = Queue() print_lock = threading.Lock() log = logging.getLogger() if threads > 1: def thread_processor(task_obj): tl = threading.local() tl.fp_video = task_obj['fp_video'] tl.idxs = task_obj['idxs'] tl.dir_out = task_obj['dir_out'] tl.sha256_tree = task_obj['sha256_tree'] tl.sha256 = task_obj['sha256'] try: tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs) except Exception as ex: logging.getLogger().error('Could not read video file') logging.getLogger().error('file: {}'.format(tl.fp_video)) logging.getLogger().error('sha256: {}'.format(tl.sha256)) return tl.labels = cfg.IMAGE_SIZE_LABELS tl.sizes = cfg.IMAGE_SIZES for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)): tl.label = tl.labels[tl.k_label] tl.width = tl.sizes[tl.k_width] # pyramid down frame sizes 1280, 640, 320, 160 try: tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims] except: logging.getLogger().error('') logging.getLogger().error('Could not resize. Bad video or missing file') logging.getLogger().error(tl.sha256) logging.getLogger().error('') return for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims): # ensure path exists tl.zpad = file_utils.zpad(tl.idx) tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg') # convert to PIL tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True) file_utils.ensure_path_exists(tl.fp_dst) tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY) def process_queue(num_items): # TODO: progress bar while True: task_obj = task_queue.get() thread_processor(task_obj) logging.getLogger().info('process: {:.2f}% {:,}/{:,}'.format( (task_queue.qsize() / num_items)*100, num_items-task_queue.qsize(), num_items)) task_queue.task_done() # avoid race conditions by creating dir structure here log.info('create directory structure first to avoid race conditions') log.info('TODO: this needs to be fixed, thread lock maybe') for sha256, item in tqdm(items.items()): item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) sha256_tree = file_utils.sha256_tree(sha256) fp_dst = join(dir_out, sha256_tree) file_utils.ensure_path_exists(fp_dst) # init threads num_items = len(items) for i in range(threads): t = threading.Thread(target=process_queue, args=(num_items,)) t.daemon = True t.start() # process threads start = time.time() for sha256, item in items.items(): sha256_tree = file_utils.sha256_tree(sha256) item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) if not item_metadata: continue keyframe_data = item_metadata.metadata idxs = keyframe_data.get(keyframe_type) fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext)) task_obj = { 'fp_video': fp_video, 'idxs': idxs, 'dir_out':dir_out, 'sha256': sha256, 'sha256_tree': sha256_tree } task_queue.put(task_obj) task_queue.join() else: for sha256, item in tqdm(items.items()): item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) if not item_metadata: continue sha256_tree = file_utils.sha256_tree(sha256) keyframe_data = item_metadata.metadata #idxs_basic = keyframe_data.get(KeyframeMetadataType.BASIC) #idxs_dense = keyframe_data.get(KeyframeMetadataType.DENSE) #idxs_expanded = keyframe_data.get(KeyframeMetadataType.EXPANDED) # fetches the metadata by the enum type from the custom click param idxs = keyframe_data.get(keyframe_type) # get frames from video fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext)) frame_ims = im_utils.vid2frames(fp_video, idxs=idxs) labels = cfg.IMAGE_SIZE_LABELS sizes = cfg.IMAGE_SIZES for k_label, k_width in zip(reversed(labels), reversed(sizes)): label = labels[k_label] width = sizes[k_width] # pyramid down frame sizes 1280, 640, 320, 160 frame_ims = [im_utils.resize(im, width=width) for im in frame_ims] for idx, im in zip(idxs, frame_ims): # ensure path exists zpad = file_utils.zpad(idx) fp_dst = join(dir_out, sha256_tree, sha256, zpad, label, 'index.jpg') # conver to PIL im_pil = im_utils.ensure_pil(im, bgr2rgb=True) file_utils.ensure_path_exists(fp_dst) im_pil.save(fp_dst, quality=cfg.JPG_SAVE_QUALITY)
def cli(ctx, sink, opt_disk, opt_density): """Generates KeyframeStatus metadata""" # Recommended: Use Expanded density to check for all keyframes # ------------------------------------------------- # imports import os from os.path import join from pathlib import Path from vframe.settings.paths import Paths from vframe.settings import vframe_cfg as cfg from vframe.utils import file_utils, logger_utils from vframe.models.metadata_item import KeyframeStatusMetadataItem # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() # set paths media_type = types.Metadata.KEYFRAME metadata_type = types.Metadata.KEYFRAME_STATUS dir_keyframes = Paths.media_dir(media_type, data_store=opt_disk, verified=ctx.opts['verified']) # iterate sink while True: chair_item = yield sha256 = chair_item.sha256 sha256_tree = file_utils.sha256_tree(sha256) dir_parent = join(dir_keyframes, sha256_tree, sha256) # check if keyframe metadata exists keyframe_metadata_item = chair_item.item.get_metadata( types.Metadata.KEYFRAME) if not keyframe_metadata_item: log.error( 'no keyframe metadata. try "append -t keyframe", {}'.format( keyframe_metadata_item)) chair_item.item.set_metadata(metadata_type, {}) else: # check if the keyframes images exist status = {k: False for k in cfg.IMAGE_SIZE_LABELS} if Path(dir_parent).exists(): # get keyframe numbers idxs = keyframe_metadata_item.get_keyframes(opt_density) for idx in idxs: labels = [v for k, v in cfg.IMAGE_SIZE_LABELS.items()] for k, label in cfg.IMAGE_SIZE_LABELS.items(): fpp_im = Path(dir_parent, file_utils.zpad(idx), label, 'index.jpg') if fpp_im.exists(): status[k] = True # append metadata to chair_item's mapping item chair_item.item.set_metadata( metadata_type, KeyframeStatusMetadataItem(status)) # ------------------------------------------------- # continue processing other items sink.send(chair_item)
def cli(ctx, sink, opt_disk, opt_format, opt_metadata_tree_type): """Collate depated metadata tree files""" # ------------------------------------------------- # imports import click from pathlib import Path from tqdm import tqdm from vframe.settings import vframe_cfg as cfg from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils from vframe.models.metadata_item import MediainfoMetadataItem, KeyframeMetadataItem from cli_vframe import processor # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() if opt_metadata_tree_type == types.MetadataTree.MEDIAINFO_TREE: metdata_type = types.Metadata.MEDIAINFO if opt_metadata_tree_type == types.MetadataTree.KEYFRAME_TREE: metdata_type = types.Metadata.KEYFRAME dir_metadata = Paths.metadata_tree_dir(opt_metadata_tree_type, data_store=opt_disk) # accumulate chair items chair_items = [] while True: try: chair_items.append( (yield) ) except GeneratorExit as ex: break skipped = [] num_skipped = 0 found = [] num_found = 0 # iterate chair items and gather metadata index.json files num_items = len(chair_items) for chair_item in tqdm(chair_items): item = chair_item.item sha256 = item.sha256 sha256_tree = file_utils.sha256_tree(sha256) fpp_metadata = Path(dir_metadata, sha256_tree, sha256, 'index.json') # skip if not existing metadata = {} if fpp_metadata.exists(): try: metadata = file_utils.lazyload(fpp_metadata) except Exception as ex: log.error('could not read json: {}, ex: {}'.format(str(fpp_metadata), ex)) continue # count items skipped if not metadata: skipped.append(fpp_metadata) num_skipped = len(skipped) per = num_skipped / (num_found + num_skipped) * 100 # log.debug('{:.2f}% ({:,}/{:,}) not found: {}'.format(per, num_skipped, (num_found + num_skipped), str(fpp_metadata))) log.debug('{:.2f}% ({:,}/{:,}) missing'.format(per, num_skipped, (num_found + num_skipped))) chair_item.item.set_metadata(metdata_type, metadata_obj) else: found.append(fpp_metadata) num_found = len(found) # construct and append metadata if metdata_type == types.Metadata.MEDIAINFO: metadata_obj = MediainfoMetadataItem.from_index_json(metadata) chair_item.item.set_metadata(metdata_type, metadata_obj) elif metdata_type == types.Metadata.KEYFRAME: metadata_obj = KeyframeMetadataItem.from_index_json(metadata) chair_item.item.set_metadata(metdata_type, metadata_obj) else: raise ValueError('{} is not a valid metadata type'.format(metdata_type)) log.info('skipped: {:,} items'.format(len(skipped))) # ------------------------------------------------- # rebuild for chair_item in chair_items: sink.send(chair_item)