def detect_gid_list(ibs, gid_list, tree_path_list, downsample=True, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection tree_path_list (list of str): the list of trees to load for detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the PyRF documentation for configuration settings Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [oldw / neww for oldw, neww in zip(oldw_list, neww_list)] else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) # Run detection results_iter = detect(ibs, gpath_list, tree_path_list, **kwargs) # Upscale the results for gid, downsample, (gpath, result_list) in zip(gid_list, downsample_list, results_iter): # Upscale the results back up to the original image size if downsample is not None and downsample != 1.0: for result in result_list: for key in ['centerx', 'centery', 'xtl', 'ytl', 'width', 'height']: result[key] = int(result[key] * downsample) yield gid, gpath, result_list
def thumb_getter(id_, thumbsize=128): """ Thumb getters must conform to thumbtup structure """ if id_ not in imgname_list: return { 'fpath': id_ + '.jpg', 'thread_func': thread_func, 'main_func': lambda: (id_, ), } # print(id_) if id_ == 'doesnotexist.jpg': return None img_path = None img_size = (100, 100) else: img_path = ut.grab_test_imgpath(id_, verbose=False) img_size = vt.open_image_size(img_path) thumb_path = join(guitool_test_thumbdir, ut.hashstr(str(img_path)) + '.jpg') if id_ == 'carl.jpg': bbox_list = [(10, 10, 200, 200)] theta_list = [0] elif id_ == 'lena.png': # bbox_list = [(10, 10, 200, 200)] bbox_list = [None] theta_list = [None] else: bbox_list = [] theta_list = [] interest_list = [False] thumbtup = (thumb_path, img_path, img_size, bbox_list, theta_list, interest_list) # print('thumbtup = %r' % (thumbtup,)) return thumbtup
def detect_gid_list(ibs, gid_list, tree_path_list, downsample=True, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection tree_path_list (list of str): the list of trees to load for detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the PyRF documentation for configuration settings Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [oldw / neww for oldw, neww in zip(oldw_list, neww_list)] else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) # Run detection results_iter = detect(ibs, gpath_list, tree_path_list, **kwargs) # Upscale the results for gid, downsample, (gpath, result_list) in zip(gid_list, downsample_list, results_iter): # Upscale the results back up to the original image size if downsample is not None and downsample != 1.0: for result in result_list: for key in ["centerx", "centery", "xtl", "ytl", "width", "height"]: result[key] = int(result[key] * downsample) yield gid, gpath, result_list
def detect_gid_list(ibs, gid_list, downsample=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the PyDarknet documentation for configuration settings Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [ oldw / neww for oldw, neww in zip(oldw_list, neww_list) ] orient_list = [1] * len(gid_list) else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) orient_list = ibs.get_image_orientation(gid_list) # Run detection results_iter = detect(gpath_list, **kwargs) # Upscale the results for downsample, gid, orient, (gpath, result_list) in zip(downsample_list, gid_list, orient_list, results_iter): # Upscale the results back up to the original image size for result in result_list: if downsample is not None and downsample != 1.0: for key in ['xtl', 'ytl', 'width', 'height']: result[key] = int(result[key] * downsample) bbox = ( result['xtl'], result['ytl'], result['width'], result['height'], ) bbox_list = [bbox] bbox_list = ibs.fix_horizontal_bounding_boxes_to_orient( gid, bbox_list) bbox = bbox_list[0] result['xtl'], result['ytl'], result['width'], result[ 'height'] = bbox yield (gid, gpath, result_list)
def resize_imagelist_to_sqrtarea(gpath_list, new_gpath_list=None, sqrt_area=800, output_dir=None, checkexists=True, **kwargs): """ Resizes images and yeilds results asynchronously """ import vtool as vt target_area = sqrt_area ** 2 # Read image sizes gsize_list = [vt.open_image_size(gpath) for gpath in gpath_list] # Compute new sizes which preserve aspect ratio newsize_list = [vt.ScaleStrat.area(target_area, wh) for wh in gsize_list] if new_gpath_list is None: # Compute names for the new images if not given if output_dir is None: # Create an output directory if not specified output_dir = 'resized_sqrtarea%r' % sqrt_area ut.ensuredir(output_dir) size_suffixs = ['_' + repr(newsize).replace(' ', '') for newsize in newsize_list] from os.path import basename old_gnames = [basename(p) for p in gpath_list] new_gname_list = [ut.augpath(p, suffix=s) for p, s in zip(old_gnames, size_suffixs)] new_gpath_list = [join(output_dir, gname) for gname in new_gname_list] new_gpath_list = list(map(ut.unixpath, new_gpath_list)) assert len(new_gpath_list) == len(gpath_list), 'unequal len' assert len(newsize_list) == len(gpath_list), 'unequal len' # Evaluate generator if checkexists: exists_list = list(map(exists, new_gpath_list)) gpath_list_ = ut.filterfalse_items(gpath_list, exists_list) new_gpath_list_ = ut.filterfalse_items(new_gpath_list, exists_list) newsize_list_ = ut.filterfalse_items(newsize_list, exists_list) else: gpath_list_ = gpath_list new_gpath_list_ = new_gpath_list newsize_list_ = newsize_list generator = resize_imagelist_generator(gpath_list_, new_gpath_list_, newsize_list_, **kwargs) for res in generator: pass #return [res for res in generator] return new_gpath_list
def detect_gid_list(ibs, gid_list, downsample=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the PyDarknet documentation for configuration settings Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [oldw / neww for oldw, neww in zip(oldw_list, neww_list)] orient_list = [1] * len(gid_list) else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) orient_list = ibs.get_image_orientation(gid_list) # Run detection results_iter = detect(gpath_list, **kwargs) # Upscale the results for downsample, gid, orient, (gpath, result_list) in zip(downsample_list, gid_list, orient_list, results_iter): # Upscale the results back up to the original image size for result in result_list: if downsample is not None and downsample != 1.0: for key in ['xtl', 'ytl', 'width', 'height']: result[key] = int(result[key] * downsample) bbox = (result['xtl'], result['ytl'], result['width'], result['height'], ) bbox_list = [ bbox ] bbox_list = ibs.fix_horizontal_bounding_boxes_to_orient(gid, bbox_list) bbox = bbox_list[0] result['xtl'], result['ytl'], result['width'], result['height'] = bbox yield (gid, gpath, result_list)
def check_image_sizes(data_uri_order, all_kpts, offset_list): """ Check if any keypoints go out of bounds wrt their associated images """ import vtool as vt from os.path import join imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images') gpath_list = [join(imgdir, imgid + '.jpg') for imgid in data_uri_order] imgsize_list = [vt.open_image_size(gpath) for gpath in gpath_list] kpts_list = [all_kpts[l:r] for l, r in ut.itertwo(offset_list)] kpts_extent = [ vt.get_kpts_image_extent(kpts, outer=False, only_xy=False) for kpts in ut.ProgIter(kpts_list, 'kpts extent') ] for i, (size, extent) in enumerate(zip(imgsize_list, kpts_extent)): w, h = size _, maxx, _, maxy = extent assert np.isnan(maxx) or maxx < w assert np.isnan(maxy) or maxy < h
def thumb_getter(id_, thumbsize=128): """ Thumb getters must conform to thumbtup structure """ #print(id_) if id_ == 'doesnotexist.jpg': return None img_path = None img_size = (100, 100) else: img_path = ut.grab_test_imgpath(id_, verbose=False) img_size = vt.open_image_size(img_path) thumb_path = join(guitool_test_thumbdir, ut.hashstr(str(img_path)) + '.jpg') if id_ == 'carl.jpg': bbox_list = [(10, 10, 200, 200)] theta_list = [0] elif id_ == 'lena.png': #bbox_list = [(10, 10, 200, 200)] bbox_list = [None] theta_list = [None] else: bbox_list = [] theta_list = [] thumbtup = (thumb_path, img_path, img_size, bbox_list, theta_list) #print('thumbtup = %r' % (thumbtup,)) return thumbtup
def read_thumb_size(thumb_path): import vtool as vt if VERBOSE_THUMB: print('[ThumbDelegate] Reading thumb size') # npimg = vt.imread(thumb_path, delete_if_corrupted=True) # (height, width) = npimg.shape[0:2] # del npimg try: width, height = vt.open_image_size(thumb_path) except IOError as ex: if ut.checkpath(thumb_path, verbose=True): ut.printex( ex, 'image=%r seems corrupted. Needs deletion' % (thumb_path, ), iswarning=True, ) ut.delete(thumb_path) else: ut.printex(ex, 'image=%r does not exist', (thumb_path, ), iswarning=True) raise return width, height
def download_sharks(XMLdata, number): """ cd ~/work/WS_ALL python -m ibeis.scripts.getshark >>> from ibeis.scripts.getshark import * # NOQA >>> url = 'www.whaleshark.org/listImages.jsp' >>> XMLdata = ut.url_read(url) >>> number = None """ # Prepare the output directory for writing, if it doesn't exist output_dir = 'sharkimages' ut.ensuredir(output_dir) dom = parseString(XMLdata) # Download files if number: maxCount = min(number, len(dom.getElementsByTagName('img'))) else: maxCount = len(dom.getElementsByTagName('img')) parsed_info = dict( img_url_list=[], localid_list=[], nameid_list=[], orig_fname_list=[], new_fname_list=[], ) print('Preparing to fetch %i files...' % maxCount) for shark in dom.getElementsByTagName('shark'): localCount = 0 for imageset in shark.getElementsByTagName('imageset'): for img in imageset.getElementsByTagName('img'): localCount += 1 img_url = img.getAttribute('href') orig_fname = split(img_url)[1] ext = splitext(orig_fname)[1].lower() nameid = shark.getAttribute('number') new_fname = '%s-%i%s' % ( nameid, localCount, ext) parsed_info['img_url_list'].append(img_url) parsed_info['nameid_list'].append(nameid) parsed_info['localid_list'].append(localCount) parsed_info['orig_fname_list'].append(orig_fname) parsed_info['new_fname_list'].append(new_fname) print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount)) if number is not None and len(parsed_info['orig_fname_list']) == number: break parsed_info['new_fpath_list'] = [join(output_dir, _fname) for _fname in parsed_info['new_fname_list']] print('Filtering parsed images') # Filter based on image type (keep only jpgs) ext_flags = [_fname.endswith('.jpg') or _fname.endswith('.jpg') for _fname in parsed_info['new_fname_list']] parsed_info = {key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items()} # Filter to only images matching the appropriate tags from ibeis import tag_funcs parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list']) tag_flags = tag_funcs.filterflags_general_tags( parsed_info['tags_list'], has_any=['view-left'], none_match=['qual.*', 'view-top', 'part-.*', 'cropped'], ) parsed_info = {key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items()} print('Tags in chosen images:') print(ut.dict_hist(ut.flatten(parsed_info['tags_list'] ))) # Download selected subset print('Downloading selected subset') _iter = list(zip(parsed_info['img_url_list'], parsed_info['new_fpath_list'])) _iter = ut.ProgressIter(_iter, lbl='downloading sharks') for img_url, new_fpath in _iter: if not exists(new_fpath): ut.download_url(img_url, new_fpath) # Remove corrupted or ill-formatted images print('Checking for corrupted images') import vtool as vt noncorrupt_flags = vt.filterflags_valid_images(parsed_info['new_fpath_list']) parsed_info = { key: ut.compress(list_, noncorrupt_flags) for key, list_ in parsed_info.items() } print('Removing small images') import numpy as np imgsize_list = np.array([vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']]) sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1)) areq_flags_list = sqrt_area_list >= 750 parsed_info = {key: ut.compress(list_, areq_flags_list) for key, list_ in parsed_info.items()} grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))), parsed_info['nameid_list']) keep_idxs = sorted(ut.flatten([idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2])) parsed_info = {key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items()} print('Moving imagse to secondary directory') named_outputdir = 'named-left-sharkimages' # Build names parsed_info['namedir_fpath_list'] = [ join(named_outputdir, _nameid, _fname) for _fname, _nameid in zip(parsed_info['new_fname_list'], parsed_info['nameid_list'])] # Create directories ut.ensuredir(named_outputdir) named_dirs = ut.unique_ordered(list(map(dirname, parsed_info['namedir_fpath_list']))) for dir_ in named_dirs: ut.ensuredir(dir_) # Copy ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'], dst_fpath_list=parsed_info['namedir_fpath_list'])
def detect_gid_list(ibs, gid_list, downsample=True, verbose=VERBOSE_SS, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the SSD documentation for configuration settings Args: ibs (ibeis.IBEISController): image analysis api gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True Kwargs: detector, config_filepath, weights_filepath, verbose Yields: tuple: (gid, gpath, result_list) CommandLine: python -m ibeis.algo.detect.ssd detect_gid_list --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.detect.ssd import * # NOQA >>> from ibeis.core_images import LocalizerConfig >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> gid_list = ibs.get_valid_gids() >>> config = {'verbose': True} >>> downsample = False >>> results_list = detect_gid_list(ibs, gid_list, downsample, **config) >>> results_list = list(results_list) >>> print('result lens = %r' % (map(len, list(results_list)))) >>> print('result[0] = %r' % (len(list(results_list[0][2])))) >>> config = {'verbose': True} >>> downsample = False >>> results_list = detect_gid_list(ibs, gid_list, downsample, **config) >>> results_list = list(results_list) >>> print('result lens = %r' % (map(len, list(results_list)))) >>> print('result[0] = %r' % (len(list(results_list[0][2])))) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [oldw / neww for oldw, neww in zip(oldw_list, neww_list)] orient_list = [1] * len(gid_list) else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) orient_list = ibs.get_image_orientation(gid_list) # Run detection results_iter = detect(gpath_list, verbose=verbose, **kwargs) # Upscale the results _iter = zip(downsample_list, gid_list, orient_list, results_iter) for downsample, gid, orient, (gpath, result_list) in _iter: # Upscale the results back up to the original image size for result in result_list: if downsample is not None and downsample != 1.0: for key in ['xtl', 'ytl', 'width', 'height']: result[key] = int(result[key] * downsample) bbox = (result['xtl'], result['ytl'], result['width'], result['height'], ) bbox_list = [ bbox ] bbox = bbox_list[0] result['xtl'], result['ytl'], result['width'], result['height'] = bbox yield (gid, gpath, result_list)
def detect_gid_list(ibs, gid_list, downsample=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True True: ibs.get_image_detectpaths() is used False: ibs.get_image_paths() is used Kwargs (optional): refer to the PyDarknet documentation for configuration settings Args: ibs (ibeis.IBEISController): image analysis api gid_list (list of int): the list of IBEIS image_rowids that need detection downsample (bool, optional): a flag to indicate if the original image sizes should be used; defaults to True Kwargs: detector, config_filepath, weights_filepath, verbose Yields: tuple: (gid, gpath, result_list) CommandLine: python -m ibeis.algo.detect.yolo detect_gid_list --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.detect.yolo import * # NOQA >>> from ibeis.core_images import LocalizerConfig >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='WS_ALL') >>> gid_list = ibs.images()._rowids[0:1] >>> kwargs = config = LocalizerConfig(**{ >>> 'weights_filepath': '/media/raid/work/WS_ALL/localizer_backup/detect.yolo.2.39000.weights', >>> 'config_filepath': '/media/raid/work/WS_ALL/localizer_backup/detect.yolo.2.cfg', >>> }) >>> exec(ut.execstr_dict(config), globals()) >>> #classes_fpath = '/media/raid/work/WS_ALL/localizer_backup/detect.yolo.2.cfg.classes' >>> downsample = False >>> (gid, gpath, result_list) = detect_gid_list(ibs, gid_list, downsample, **config) >>> result = ('(gid, gpath, result_list) = %s' % (ut.repr2((gid, gpath, result_list)),)) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() Yields: results (list of dict) """ # Get new gpaths if downsampling if downsample: gpath_list = ibs.get_image_detectpaths(gid_list) neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list] oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)] downsample_list = [ oldw / neww for oldw, neww in zip(oldw_list, neww_list) ] orient_list = [1] * len(gid_list) else: gpath_list = ibs.get_image_paths(gid_list) downsample_list = [None] * len(gpath_list) orient_list = ibs.get_image_orientation(gid_list) # Run detection results_iter = detect(gpath_list, **kwargs) # Upscale the results _iter = zip(downsample_list, gid_list, orient_list, results_iter) for downsample, gid, orient, (gpath, result_list) in _iter: # Upscale the results back up to the original image size for result in result_list: if downsample is not None and downsample != 1.0: for key in ['xtl', 'ytl', 'width', 'height']: result[key] = int(result[key] * downsample) bbox = ( result['xtl'], result['ytl'], result['width'], result['height'], ) bbox_list = [bbox] bbox = bbox_list[0] result['xtl'], result['ytl'], result['width'], result[ 'height'] = bbox yield (gid, gpath, result_list)
def download_sharks(XMLdata, number): """ cd ~/work/WS_ALL python -m ibeis.scripts.getshark >>> from ibeis.scripts.getshark import * # NOQA >>> url = 'www.whaleshark.org/listImages.jsp' >>> XMLdata = ut.url_read(url) >>> number = None """ # Prepare the output directory for writing, if it doesn't exist output_dir = 'sharkimages' ut.ensuredir(output_dir) dom = parseString(XMLdata) # Download files if number: maxCount = min(number, len(dom.getElementsByTagName('img'))) else: maxCount = len(dom.getElementsByTagName('img')) parsed_info = dict( img_url_list=[], localid_list=[], nameid_list=[], orig_fname_list=[], new_fname_list=[], ) print('Preparing to fetch %i files...' % maxCount) for shark in dom.getElementsByTagName('shark'): localCount = 0 for imageset in shark.getElementsByTagName('imageset'): for img in imageset.getElementsByTagName('img'): localCount += 1 img_url = img.getAttribute('href') orig_fname = split(img_url)[1] ext = splitext(orig_fname)[1].lower() nameid = shark.getAttribute('number') new_fname = '%s-%i%s' % (nameid, localCount, ext) parsed_info['img_url_list'].append(img_url) parsed_info['nameid_list'].append(nameid) parsed_info['localid_list'].append(localCount) parsed_info['orig_fname_list'].append(orig_fname) parsed_info['new_fname_list'].append(new_fname) print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount)) if number is not None and len( parsed_info['orig_fname_list']) == number: break parsed_info['new_fpath_list'] = [ join(output_dir, _fname) for _fname in parsed_info['new_fname_list'] ] print('Filtering parsed images') # Filter based on image type (keep only jpgs) ext_flags = [ _fname.endswith('.jpg') or _fname.endswith('.jpg') for _fname in parsed_info['new_fname_list'] ] parsed_info = { key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items() } # Filter to only images matching the appropriate tags from ibeis import tag_funcs parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list']) tag_flags = tag_funcs.filterflags_general_tags( parsed_info['tags_list'], has_any=['view-left'], none_match=['qual.*', 'view-top', 'part-.*', 'cropped'], ) parsed_info = { key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items() } print('Tags in chosen images:') print(ut.dict_hist(ut.flatten(parsed_info['tags_list']))) # Download selected subset print('Downloading selected subset') _iter = list( zip(parsed_info['img_url_list'], parsed_info['new_fpath_list'])) _iter = ut.ProgressIter(_iter, lbl='downloading sharks') for img_url, new_fpath in _iter: if not exists(new_fpath): ut.download_url(img_url, new_fpath) # Remove corrupted or ill-formatted images print('Checking for corrupted images') import vtool as vt noncorrupt_flags = vt.filterflags_valid_images( parsed_info['new_fpath_list']) parsed_info = { key: ut.compress(list_, noncorrupt_flags) for key, list_ in parsed_info.items() } print('Removing small images') import numpy as np imgsize_list = np.array( [vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']]) sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1)) areq_flags_list = sqrt_area_list >= 750 parsed_info = { key: ut.compress(list_, areq_flags_list) for key, list_ in parsed_info.items() } grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))), parsed_info['nameid_list']) keep_idxs = sorted( ut.flatten( [idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2])) parsed_info = { key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items() } print('Moving imagse to secondary directory') named_outputdir = 'named-left-sharkimages' # Build names parsed_info['namedir_fpath_list'] = [ join(named_outputdir, _nameid, _fname) for _fname, _nameid in zip( parsed_info['new_fname_list'], parsed_info['nameid_list']) ] # Create directories ut.ensuredir(named_outputdir) named_dirs = ut.unique_ordered( list(map(dirname, parsed_info['namedir_fpath_list']))) for dir_ in named_dirs: ut.ensuredir(dir_) # Copy ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'], dst_fpath_list=parsed_info['namedir_fpath_list'])