def cli(ctx, fp_in, opt_format, opt_disk, opt_metadata_types, opt_verified, opt_num_pieces): """Add mappings data to chain""" # ------------------------------------------------- # imports import os from tqdm import tqdm from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() log.info('opt_format: {}'.format(opt_format)) log.info('opt_disk: {}'.format(opt_disk)) log.info('opt_metadata_type(s): {}'.format(opt_metadata_types)) if not fp_in: fps_in = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, file_format=opt_format, verified=opt_verified) log.info('fp_in: {}'.format(fp_in)) # load the file raw data = file_utils.lazyload(fp_in)
def cli(ctx, sink, opt_dir_media, opt_disk, opt_density, opt_size_type, opt_drawframes): """Appends images to ChairItem""" # ------------------------------------------------- # imports from os.path import join import cv2 as cv from vframe.utils import file_utils, logger_utils from vframe.settings.paths import Paths # ------------------------------------------------- # initialize log = logger_utils.Logger.getLogger() log.debug('append images to pipeline') # process keyframes if not opt_dir_media: dir_media = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=ctx.opts['verified']) else: dir_media = opt_dir_media # ------------------------------------------------- # process while True: chair_item = yield if chair_item.chair_type == types.ChairItemType.PHOTO: chair_item.load_images(dir_media, opt_size_type, opt_drawframes=opt_drawframes) if chair_item.chair_type == types.ChairItemType.VIDEO: pass #chair_item.load_images(opt_size_type, opt_drawframes=opt_drawframes) if chair_item.chair_type == types.ChairItemType.VIDEO_KEYFRAME: chair_item.load_images(opt_size_type, opt_drawframes=opt_drawframes) if chair_item.chair_type == types.ChairItemType.MEDIA_RECORD: chair_item.load_images(dir_media, opt_size_type, opt_density, opt_drawframes=opt_drawframes) # ------------------------------------------------------------ # send back to generator sink.send(chair_item)
def cli(ctx, sink, fp_out, opt_format, opt_disk, opt_metadata_type, opt_minify, opt_force, opt_interval): """Writes items to disk as JSON or Pickle""" from vframe.utils import logger_utils log = logger_utils.Logger.getLogger() # construct path if not fp_out: fp_out = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, file_format=opt_format, verified=ctx.opts['verified']) # accumulate items chair_items = [] interval_count = 0 while True: chair_item = yield chair_items.append(chair_item) if len(chair_items) >= opt_interval: # save data log.debug('chair_items: {}'.format(len(chair_items))) mapping_items = file_utils.chair_to_mapping(chair_items) log.debug('mapping_items: {}'.format(len(mapping_items))) # checkpoint filename fpp_out = Path(fp_out) ckpt_suffix = '{}_{}'.format(interval_count*opt_interval, ((interval_count+1)*opt_interval)) fp_out = join(str(fpp_out.parent), '{}_{}{}'.format(fpp_out.stem, ckpt_suffix, fpp_out.suffix)) # # write to disk log.debug('saving checkpoint to: {}'.format(fp_out)) file_utils.write_serialized_items(mapping_items, fp_out, ensure_path=True, minify=opt_minify) # dump data interval_count += 1 for chair_item in chair_items: chair_item.media_record.remove_metadata(opt_metadata_type) chair_items = [] sink.send(chair_item)
def cli(ctx, sink, fp_in, opt_format, opt_disk): """Add mappings data to chain""" import os import logging from tqdm import tqdm from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils from vframe.models.chair_item import MediaRecordChairItem from vframe.utils import logger_utils log = logger_utils.Logger.getLogger() log.info('opt_format: {}'.format(opt_format)) log.info('opt_disk: {}'.format(opt_disk)) if not fp_in: fp_in = Paths.media_record_index(data_store=opt_disk, file_format=opt_format, verified=ctx.opts['verified']) # load mappings # TODO make multithreaded log.info('opening: {}'.format(fp_in)) media_records = file_utils.load_records(fp_in) # update ctx variable log.debug('set num items: {}'.format(len(media_records))) ctx.opts['num_items'] = len(media_records) # ctx.opts['chair_type'] = ChairItemType.MEDIA_RECORD # begin processing if not media_records or not ctx.opts['num_items'] > 0: log.error('no media_record available to process') return else: log.info('dispatching {:,} records...'.format(ctx.opts['num_items'])) for sha256, media_record in tqdm(media_records.items()): sink.send(MediaRecordChairItem(ctx, media_record))
def cli(ctx, opt_fp_neg, opt_dir_project, opt_disk, opt_size): """Generates negative images""" # ------------------------------------------------ # imports import os from os.path import join from glob import glob from pathlib import Path from vframe.utils import logger_utils, im_utils, file_utils from vframe.settings.paths import Paths log = logger_utils.Logger.getLogger() log.debug('negative mining') dir_media_unver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.UNVERIFIED) dir_media_ver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.VERIFIED) opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size] fp_train_neg = join(opt_dir_project, vcat_cfg.FP_TRAIN_NEGATIVES) dir_labels_negative = join(opt_dir_project, vcat_cfg.DIR_LABELS_NEGATIVE) dir_negative = join(opt_dir_project, vcat_cfg .DIR_IMAGES_NEGATIVE) file_utils.mkdirs(dir_negative) file_utils.mkdirs(dir_labels_negative) negative_list = pd.read_csv(opt_fp_neg) negative_list['description'] = negative_list['description'].fillna('') # ensure not empty # negative_list['desc'] = negative_list['desc'].astype('str') neg_training_files = [] # for sha256 in sha256_list[:35]: for i, row in negative_list.iterrows(): sha256 = row['sha256'] sha256_tree = file_utils.sha256_tree(sha256) ver_list = glob(join(dir_media_ver, sha256_tree, sha256, "*")) unver_list = glob(join(dir_media_unver, sha256_tree, sha256, "*")) dir_frames = ver_list + unver_list log.debug('adding {} frames about "{}"'.format(len(dir_frames), row['description'])) for dir_frame in dir_frames: frame_idx = Path(dir_frame).stem fp_keyframe_src = join(dir_frame, opt_size_label, 'index.jpg') fpp_keyframe_src = Path(fp_keyframe_src) if fpp_keyframe_src.exists(): # create symlinked image fpp_keyframe_dst = Path(join(dir_negative, '{}_{}.jpg'.format(sha256, frame_idx))) if fpp_keyframe_dst.exists() and fpp_keyframe_dst.is_symlink(): fpp_keyframe_dst.unlink() fpp_keyframe_dst.symlink_to(fpp_keyframe_src) # create empty label fp_label_txt = join(dir_labels_negative, '{}_{}.txt'.format(sha256, frame_idx)) with open(fp_label_txt, 'w') as fp: fp.write('') # and, add this file to the training list neg_training_files.append(str(fpp_keyframe_dst)) # for each keyframe if it exists log.info('writing {} lines to: {}'.format(len(neg_training_files), fp_train_neg)) file_utils.write_text(neg_training_files, fp_train_neg) # add prompt log.info('mv labels_negative/*.txt labels/') log.info('mv images_negative/*.jpg images/')
def cli(ctx, sink, fp_in, opt_format, opt_disk, opt_metadata_types): """Appends metadata to media record""" # ------------------------------------------------- # imports import os from tqdm import tqdm from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils from vframe.models.chair_item import ChairItem # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() log.info('fp_in: {}'.format(fp_in)) log.info('opt_format: {}'.format(opt_format)) log.info('opt_disk: {}'.format(opt_disk)) log.info('opt_metadata_type(s): {}'.format(opt_metadata_types)) if not fp_in: fps_in = [ Paths.metadata_index(opt_metadata_type, data_store=opt_disk, file_format=opt_format, verified=ctx.opts['verified']) for opt_metadata_type in opt_metadata_types ] # accumulate items chair_items = [] while True: try: chair_items.append((yield)) except GeneratorExit as ex: break # ------------------------------------------------------------------------ # append items for opt_metadata_type, fp_in in zip(opt_metadata_types, fps_in): log.debug('opening: {}'.format(fp_in)) media_records = file_utils.load_records(fp_in) if not media_records: log.error( 'no metadata items or file. check "-d" / "--disk" and try again' ) return log.debug('appending: {}'.format(opt_metadata_type.name.lower())) for chair_item in tqdm(chair_items): sha256 = chair_item.sha256 metadata = media_records[sha256].get_metadata(opt_metadata_type) chair_item.media_record.set_metadata(opt_metadata_type, metadata) # ------------------------------------------------ # rebuild the generator for chair_item in tqdm(chair_items): sink.send(chair_item)
def cli(ctx, sink, opt_metadata, opt_disk, opt_stroke_width, opt_stroke_color, opt_text_color): """Displays images""" # ------------------------------------------------- # imports import os import cv2 as cv import numpy as np from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils, chair_utils # ------------------------------------------------- # init log = logger_utils.Logger.getLogger() # load class labels if opt_metadata == types.Metadata.COCO: opt_net = types.DetectorNet.COCO fp_classes = Paths.darknet_classes(data_store=opt_disk, opt_net=opt_net) classes = file_utils.load_text(fp_classes) # returns list in idx order if opt_metadata == types.Metadata.OPENIMAGES: opt_net = types.DetectorNet.OPENIMAGES fp_classes = Paths.darknet_classes(data_store=opt_disk, opt_net=opt_net) classes = file_utils.load_text(fp_classes) # returns list in idx order elif opt_metadata == types.Metadata.SUBMUNITION: opt_net = types.DetectorNet.SUBMUNITION fp_classes = Paths.darknet_classes(data_store=opt_disk, opt_net=opt_net) classes = file_utils.load_text(fp_classes) # returns list in idx order elif opt_metadata == types.Metadata.PLACES365: opt_net = types.ClassifyNet.PLACES365 # TODO add class file elif opt_metadata == types.Metadata.TEXT_ROI: pass elif opt_metadata == types.Metadata.FACE_ROI: pass # get colors for stroke colors = get_color_map(cmap='autumn', reverse=True, ncolors=len(classes)) # TODO externalize function # ------------------------------------------------- # process while True: chair_item = yield drawframes = {} # new drawframes # --------------------------------------------------------------- # draw on images, assume detection results (not classify) detection_metadata = chair_item.get_metadata(opt_metadata) for frame_idx in chair_item.drawframes.keys(): drawframe = chair_item.drawframes.get(frame_idx) imh, imw = drawframe.shape[:2] detection_results = detection_metadata.metadata.get(frame_idx) for detection_result in detection_results: if opt_metadata == types.Metadata.COCO \ or opt_metadata == types.Metadata.SUBMUNITION \ or opt_metadata == types.Metadata.VOC \ or opt_metadata == types.Metadata.OPENIMAGES: # draw object detection boxes and labels log.debug(detection_result) frame = draw_utils.draw_detection_result( drawframe, classes, detection_result, imw, imh, stroke_weight=opt_stroke_width, rect_color=colors[detection_result.idx], text_color=opt_text_color) elif opt_metadata == types.Metadata.TEXT_ROI: frame = draw_utils.draw_roi(drawframe, detection_result, imw, imh, text='TEXT', stroke_weight=opt_stroke_width, rect_color=opt_stroke_color, text_color=opt_text_color) elif opt_metadata == types.Metadata.FACE_ROI: frame = draw_utils.draw_roi(drawframe, detection_result, imw, imh, text='FACE', stroke_weight=opt_stroke_width, rect_color=opt_stroke_color, text_color=opt_text_color) # add to current items drawframes dict drawframes[frame_idx] = drawframe chair_item.set_drawframes(drawframes) # ------------------------------------------------ # rebuild the generator sink.send(chair_item)
def cli(ctx, fp_in, opt_metadata_type, opt_type, opt_verified, opt_id, opt_disk, opt_format): """search for info with ID""" # ------------------------------------------------- # imports import os from os.path import join from pathlib import Path from vframe.utils import file_utils, logger_utils from vframe.settings.paths import Paths from vframe.settings import vframe_cfg as cfg # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() log.debug('opt_type: {}, opt_metadata_type: {}'.format( opt_type, opt_metadata_type)) # if not opt_type: # # auto guess # nchars = len(opt_id) # if nchars == 64: # opt_types = [types.SearchParam.SA_ID, types.SearchParam.SHA256] # elif nchars == 32: # opt_type = [types.SearchParam.MD5] # else: # log.error('id not a valid format. use either 32-hex MD5 or 64-hex SHA256') # return if not fp_in: if opt_metadata_type: fp_in = Paths.metadata_index(data_store=opt_disk, file_format=opt_format, verified=opt_verified, metadata_type=opt_metadata_type) # use source media_records else: fp_in = Paths.media_record_index(data_store=opt_disk, file_format=opt_format, verified=opt_verified) log.info('opening: {}'.format(fp_in)) media_records = file_utils.load_records(fp_in) log.info('searching {:,} media records for {}: {}'.format( len(media_records), opt_type, opt_id)) found_items = [] for sha256, media_record in media_records.items(): if opt_type == types.SearchParam.SHA256: # quick match sha256 if opt_id == sha256: found_items.append(media_record) break else: # get sc metadata sugarcube_metadata = media_record.get_metadata( types.Metadata.SUGARCUBE) if not sugarcube_metadata: log.error('no sugarcube metadata. Try "append -t sugarcube"') return # match other params if opt_type == types.SearchParam.SA_ID: if opt_id == sugarcube_metadata.sa_id: found_items.append(media_record) break elif opt_type == types.SearchParam.MD5: if opt_id == sugarcube_metadata.md5: found_items.append(media_record) break if not len(found_items) > 0: log.error('No results') else: log.info('{} item found'.format(len(found_items))) metadata_records = media_record.metadata log.debug('sha256: {}'.format(media_record.sha256)) log.debug('\tformat: {}'.format(media_record.media_format)) log.debug('\tverified: {}'.format(media_record.verified)) if opt_metadata_type: for metadata_type, metadata_obj in metadata_records.items(): log.debug('\ttype: {}'.format(metadata_type)) try: log.debug('\tmetadata: {}'.format( metadata_obj.serialize())) except Exception as ex: log.debug('\tmetadata: {}'.format(metadata_obj.__dict__))
def cli(ctx, fp_in, fp_out, opt_disk, opt_verified, opt_format_in, opt_format_out, opt_metadata_type, opt_minify, opt_force): """Converts JSON to Pickle""" # ------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click_spinner from vframe.settings import vframe_cfg as cfg from vframe.settings.paths import Paths from vframe.settings import types from vframe.utils import file_utils, logger_utils # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() if not opt_metadata_type and not fp_in: log.error( 'Error: missing option for either "-t" / "--type" or "-i" / "--input"' ) return if not fp_in: fp_in = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, file_format=opt_format_in, verified=opt_verified) if not fp_out: fpp_in = Path(fp_in) ext = opt_format_out.name.lower() fp_out = join(str(fpp_in.parent), '{}.{}'.format(fpp_in.stem, ext)) # check again ext_in, ext_out = (file_utils.get_ext(fp_in), file_utils.get_ext(fp_out)) if ext_in == ext_out or opt_format_in == opt_format_out: ctx.fail('Cannot convert from "{}" to "{}" (same)'.format( ext_in, ext_in)) if Path(fp_out).exists() and not opt_force: log.error( 'Files exists. Use "-f/--force" to overwrite. {}'.format(fp_out)) else: with click_spinner.spinner(): log.info('Converting {} to {}'.format(fp_in, fp_out)) if ext_out == types.FileExt.PKL.name.lower(): file_utils.write_pickle(file_utils.load_json(fp_in), fp_out) elif ext_out == types.FileExt.JSON.name.lower(): file_utils.write_json(file_utils.load_pickle(fp_in), fp_out, minify=opt_minify) # compare sizes size_src = os.path.getsize(fp_in) / 1000000 size_dst = os.path.getsize(fp_out) / 1000000 per = size_dst / size_src * 100 txt_verb = 'increased' if size_dst > size_src else 'decreased' log.info('Size {} from {:.2f}MB to {:.2f}MB ({:.2f}%)'.format( txt_verb, size_src, size_dst, per))
def cli(ctx, sink, opt_disk, opt_density): """Generates KeyframeStatus metadata""" # Recommended: Use Expanded density to check for all keyframes # ------------------------------------------------- # imports import os from os.path import join from pathlib import Path from vframe.settings.paths import Paths from vframe.settings import vframe_cfg as cfg from vframe.utils import file_utils, logger_utils from vframe.models.metadata_item import KeyframeStatusMetadataItem # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() # set paths media_type = types.Metadata.KEYFRAME metadata_type = types.Metadata.KEYFRAME_STATUS dir_keyframes = Paths.media_dir(media_type, data_store=opt_disk, verified=ctx.opts['verified']) # iterate sink while True: chair_item = yield sha256 = chair_item.sha256 sha256_tree = file_utils.sha256_tree(sha256) dir_parent = join(dir_keyframes, sha256_tree, sha256) # check if keyframe metadata exists keyframe_metadata_item = chair_item.item.get_metadata( types.Metadata.KEYFRAME) if not keyframe_metadata_item: log.error( 'no keyframe metadata. try "append -t keyframe", {}'.format( keyframe_metadata_item)) chair_item.item.set_metadata(metadata_type, {}) else: # check if the keyframes images exist status = {k: False for k in cfg.IMAGE_SIZE_LABELS} if Path(dir_parent).exists(): # get keyframe numbers idxs = keyframe_metadata_item.get_keyframes(opt_density) for idx in idxs: labels = [v for k, v in cfg.IMAGE_SIZE_LABELS.items()] for k, label in cfg.IMAGE_SIZE_LABELS.items(): fpp_im = Path(dir_parent, file_utils.zpad(idx), label, 'index.jpg') if fpp_im.exists(): status[k] = True # append metadata to chair_item's mapping item chair_item.item.set_metadata( metadata_type, KeyframeStatusMetadataItem(status)) # ------------------------------------------------- # continue processing other items sink.send(chair_item)
def cli(ctx, fp_in, fp_out, opt_media_record_type, opt_client_record_type, opt_disk, opt_media_format_type, opt_format, opt_verified, opt_minify, opt_force): """Generates dataset records""" # greeet # import os from os.path import join from pathlib import Path from tqdm import tqdm from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils from vframe.settings import vframe_cfg as cfg from vframe.models.media_item import MediaRecordItem from vframe.models.chair_item import ChairItem # ------------------------------------------------- # process here metadata_type = types.Metadata.MEDIA_RECORD log = logger_utils.Logger.getLogger() if not fp_out: fp_out = Paths.metadata_index(metadata_type, data_store=opt_disk, file_format=opt_format, verified=opt_verified) log.debug('fp_in: {}'.format(fp_in)) log.debug('fp_in: {}'.format(fp_out)) log.debug('opt_disk: {}'.format(opt_disk)) log.debug('opt_media_format_type: {}'.format(opt_media_format_type)) log.debug('opt_media_record_type: {}'.format(opt_media_record_type)) log.debug('opt_verified: {}'.format(opt_verified)) # input error handling if opt_media_format_type == types.MediaFormat.PHOTO: log.error('Option not available: {}'.format(types.MediaFormat.PHOTO)) return if opt_media_record_type != types.MediaRecord.SHA256: log.error('Option not available: {}'.format(opt_media_record_type)) return if opt_client_record_type != types.ClientRecord.SUGARCUBE: log.error('Option not available: {}'.format(opt_media_record_type)) return # handle different types of input records if opt_client_record_type == types.ClientRecord.SUGARCUBE: # generate records from Sugarcube client export data verified_status = True if opt_verified is types.Verified.VERIFIED else False # sa_id,sha256,md5,location,verified csv_rows = file_utils.load_csv(fp_in) # as list # remap as sugarcube item media_records = {} # map sugarcube items log.debug('mapping {:,} entries to {}'.format(len(csv_rows), opt_media_record_type)) for row in tqdm(csv_rows): sha256 = row.get('sha256', None) fp_media = row.get('location', None) is_verified = row.get('verified', '').lower() == 'true' verified = types.Verified.VERIFIED if is_verified else types.Verified.UNVERIFIED if sha256 and fp_media and len( sha256) == 64 and verified == opt_verified: ext = file_utils.get_ext(fp_media) media_format = file_utils.ext_media_format( ext) # enums.MediaType if media_format == opt_media_format_type: media_records[sha256] = MediaRecordItem( sha256, media_format, verified) log.debug('non-filtered: {:,} records'.format(len(media_records))) log.debug('fp_out: {}'.format(fp_out)) file_utils.write_serialized_items(media_records, fp_out, ensure_path=True, minify=opt_minify) # -------------------------------------------------
def cli(ctx, sink, opt_disk, opt_net, opt_gpu): """Generates detection metadata (CV DNN)""" # ---------------------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click import cv2 as cv import numpy as np from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils from vframe.models.metadata_item import DetectMetadataItem, DetectResult from vframe.settings.paths import Paths # ---------------------------------------------------------------- # init log = logger_utils.Logger.getLogger() # TODO externalize function # initialize dnn dnn_clr = (0, 0, 0) # mean color to subtract dnn_scale = 1/255 # ? nms_threshold = 0.4 #Non-maximum suppression threshold dnn_px_range = 1 # pixel value range ? dnn_crop = False # probably crop or force resize # Use mulitples of 32: 416, 448, 480, 512, 544, 576, 608, 640, 672, 704 if opt_net == types.DetectorNet.OPENIMAGES: metadata_type = types.Metadata.OPENIMAGES dnn_size = (608, 608) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.COCO: metadata_type = types.Metadata.COCO dnn_size = (416, 416) dnn_threshold = 0.925 elif opt_net == types.DetectorNet.COCO_SPP: metadata_type = types.Metadata.COCO dnn_size = (608, 608) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.VOC: metadata_type = types.Metadata.VOC dnn_size = (416, 416) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.SUBMUNITION: metadata_type = types.Metadata.SUBMUNITION dnn_size = (608, 608) dnn_threshold = 0.90 # Initialize the parameters fp_cfg = Paths.darknet_cfg(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_weights = Paths.darknet_weights(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_data = Paths.darknet_data(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_classes = Paths.darknet_classes(opt_net=opt_net, data_store=opt_disk) class_names = file_utils.load_text(fp_classes) class_idx_lookup = {label: i for i, label in enumerate(class_names)} log.debug('fp_cfg: {}'.format(fp_cfg)) log.debug('fp_weights: {}'.format(fp_weights)) log.debug('fp_data: {}'.format(fp_data)) log.debug('fp_classes: {}'.format(fp_classes)) net = cv.dnn.readNetFromDarknet(fp_cfg, fp_weights) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) # ---------------------------------------------------------------- # process # iterate sink while True: chair_item = yield metadata = {} for frame_idx, frame in chair_item.keyframes.items(): frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_clr, dnn_px_range, crop=dnn_crop) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers net_outputs = net.forward(dnn_utils.getOutputsNames(net)) det_results = dnn_utils.nms_cvdnn(net_outputs, dnn_threshold, nms_threshold) metadata[frame_idx] = det_results # append metadata to chair_item's mapping item chair_item.set_metadata(metadata_type, DetectMetadataItem(metadata)) # ---------------------------------------------------------------- # yield back to the processor pipeline # send back to generator sink.send(chair_item)
def cli(ctx, sink, opt_disk, opt_format, opt_metadata_tree_type): """Collate depated metadata tree files""" # ------------------------------------------------- # imports import click from pathlib import Path from tqdm import tqdm from vframe.settings import vframe_cfg as cfg from vframe.settings.paths import Paths from vframe.utils import file_utils, logger_utils from vframe.models.metadata_item import MediainfoMetadataItem, KeyframeMetadataItem from cli_vframe import processor # ------------------------------------------------- # process log = logger_utils.Logger.getLogger() if opt_metadata_tree_type == types.MetadataTree.MEDIAINFO_TREE: metdata_type = types.Metadata.MEDIAINFO if opt_metadata_tree_type == types.MetadataTree.KEYFRAME_TREE: metdata_type = types.Metadata.KEYFRAME dir_metadata = Paths.metadata_tree_dir(opt_metadata_tree_type, data_store=opt_disk) # accumulate chair items chair_items = [] while True: try: chair_items.append( (yield) ) except GeneratorExit as ex: break skipped = [] num_skipped = 0 found = [] num_found = 0 # iterate chair items and gather metadata index.json files num_items = len(chair_items) for chair_item in tqdm(chair_items): item = chair_item.item sha256 = item.sha256 sha256_tree = file_utils.sha256_tree(sha256) fpp_metadata = Path(dir_metadata, sha256_tree, sha256, 'index.json') # skip if not existing metadata = {} if fpp_metadata.exists(): try: metadata = file_utils.lazyload(fpp_metadata) except Exception as ex: log.error('could not read json: {}, ex: {}'.format(str(fpp_metadata), ex)) continue # count items skipped if not metadata: skipped.append(fpp_metadata) num_skipped = len(skipped) per = num_skipped / (num_found + num_skipped) * 100 # log.debug('{:.2f}% ({:,}/{:,}) not found: {}'.format(per, num_skipped, (num_found + num_skipped), str(fpp_metadata))) log.debug('{:.2f}% ({:,}/{:,}) missing'.format(per, num_skipped, (num_found + num_skipped))) chair_item.item.set_metadata(metdata_type, metadata_obj) else: found.append(fpp_metadata) num_found = len(found) # construct and append metadata if metdata_type == types.Metadata.MEDIAINFO: metadata_obj = MediainfoMetadataItem.from_index_json(metadata) chair_item.item.set_metadata(metdata_type, metadata_obj) elif metdata_type == types.Metadata.KEYFRAME: metadata_obj = KeyframeMetadataItem.from_index_json(metadata) chair_item.item.set_metadata(metdata_type, metadata_obj) else: raise ValueError('{} is not a valid metadata type'.format(metdata_type)) log.info('skipped: {:,} items'.format(len(skipped))) # ------------------------------------------------- # rebuild for chair_item in chair_items: sink.send(chair_item)
def cli(ctx, sink, fp_out, opt_format, opt_disk, opt_metadata_type, opt_minify, opt_force, opt_suffix, opt_ckpt_size, opt_purge): """Writes items to disk as JSON or Pickle""" # ------------------------------------------------------ # imports import sys from os.path import join from pathlib import Path from collections import OrderedDict import gc import copy import numpy as np from vframe.settings.paths import Paths from vframe.utils import file_utils, click_utils from vframe.utils import logger_utils from vframe.models.chair_item import MediaRecordChairItem # -------------------------------------------------------- # init log = logger_utils.Logger.getLogger() if not fp_out: fp_out = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, file_format=opt_format, verified=ctx.opts['verified']) fpp_out = Path(fp_out) if opt_suffix: fp_out = join(str(fpp_out.parent), '{}_{}{}'.format(fpp_out.stem, opt_suffix, fpp_out.suffix)) def create_ckpt_fpaths(num_items, opt_ckpt_size): ckpts = list(range(0, num_items, opt_ckpt_size)) if np.max(np.array(ckpts)) < num_items: ckpts.append(num_items) for i, ckpt in enumerate(ckpts[:-1]): n_start = file_utils.zpad(ckpt, num_zeros=cfg.CKPT_ZERO_PADDING) n_end = file_utils.zpad(ckpts[i+1], num_zeros=cfg.CKPT_ZERO_PADDING) ckpt_suffix = 'ckpt_{}_{}{}'.format(n_start, n_end, fpp_out.suffix) # 0_10.pkl fp = join(str(fpp_out.parent), '{}_{}'.format(fpp_out.stem, ckpt_suffix)) ckpt_fpaths.append(fp) return ckpt_fpaths # -------------------------------------------------------- # checkpoint interval saving if opt_ckpt_size: # save items every N iterations yield_count = 0 ckpt_iter_num = 0 # chair_items = OrderedDict({}) chair_items = [] ckpt_fpaths = [] while True: chair_item = yield yield_count += 1 # ctx variables can only be accessed after processor starts # hack: set filepaths after while/yield loop starts if not ckpt_fpaths: num_items = ctx.opts['num_items'] ckpt_fpaths = create_ckpt_fpaths(num_items, opt_ckpt_size) log.debug('{}'.format(ckpt_fpaths)) # ensure it does not already exist for fp in ckpt_fpaths: if Path(fp).exists() and not opt_force: log.error('File "{}" exists. Use "-f" to override'.format(fp)) log.error('This error occurs later because it uses variables from the processor context') return # accumulate chair items chair_items.append(chair_item) if (yield_count > 0 and yield_count % opt_ckpt_size == 0) or yield_count >= num_items: fp_out = ckpt_fpaths[ckpt_iter_num] # convert chair items to media records log.debug('chair_items: {}'.format(len(chair_items))) mapping_items = file_utils.chair_to_mapping(chair_items) # write to disk log.debug('fp_out: {}'.format(fp_out)) file_utils.write_serialized_items(mapping_items, fp_out, ensure_path=True, minify=opt_minify) # TODO improve this # # purge metadata, for chair_item in chair_items: chair_item.purge_metadata() chair_items = [] mapping_items = [] ckpt_iter_num += 1 # collect/empty garbage gc.collect() # continue chair processors sink.send(chair_item) else: # -------------------------------------------------------- # save all # save all items # exit if file exists if Path(fp_out).exists() and not opt_force: m = 'File "{}" exists. Use "-f" to override'.format(fp_out) log.error(m) return # accumulate items chair_items = [] while True: try: chair_items.append( (yield) ) except GeneratorExit as ex: break if not len(chair_items) > 0: log.error('no items to save') return # convert chair items to media records log.debug('chair_items: {}'.format(len(chair_items))) mapping_items = file_utils.chair_to_mapping(chair_items) log.debug('mapping_items: {}'.format(len(mapping_items))) # write to disk log.debug('fp_out: {}'.format(fp_out)) file_utils.write_serialized_items(mapping_items, fp_out, ensure_path=True, minify=opt_minify) # rebuild the generator for chair_item in chair_items: sink.send(chair_item)