def _get_models(ibs, species, modeldir="default", cfg_override=True, verbose=VERBOSE_RF): r""" Args: ibs (IBEISController): ibeis controller object species (?): modeldir (str): (default = 'default') cfg_override (bool): (default = True) verbose (bool): verbosity flag(default = False) Returns: ?: fpath_list CommandLine: python -m ibeis.algo.detect.randomforest --test-_get_models Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.detect.randomforest import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> species = ibeis.const.TEST_SPECIES.ZEB_PLAIN >>> modeldir = 'default' >>> cfg_override = True >>> verbose = False >>> fpath_list = _get_models(ibs, species, modeldir, cfg_override, verbose) >>> result = ('fpath_list = %s' % (str(fpath_list),)) >>> print(result) """ # with ut.embed_on_exception_context: if cfg_override and len(ibs.cfg.detect_cfg.trees_path) > 0: trees_path = ibs.cfg.detect_cfg.trees_path else: # Ensure all models downloaded and accounted for assert species is not None, "[_get_models] Cannot detect without specifying a species" grabmodels.ensure_models(modeldir=modeldir, verbose=verbose) trees_path = grabmodels.get_species_trees_paths(species, modeldir=modeldir) # Load tree paths if ut.checkpath(trees_path, verbose=verbose): fpath_list = ut.ls(trees_path, "*.txt") # direct = Directory(trees_path, include_extensions=['txt']) # files = direct.files() else: # If the models do not exist, return None fpath_list = None if fpath_list is None or len(fpath_list) == 0: msg = ( ut.codeblock( """ [_get_models] Error loading trees, either directory or fpath_list not found * trees_path = %r * fpath_list = %r * species = %r * model_dir = %r * cfg_override = %r """ ) % (trees_path, fpath_list, species, modeldir, cfg_override) ) raise AssertionError(msg) return fpath_list
def _get_models(ibs, species, modeldir='default', cfg_override=True, verbose=VERBOSE_RF): r""" Args: ibs (IBEISController): ibeis controller object species (?): modeldir (str): (default = 'default') cfg_override (bool): (default = True) verbose (bool): verbosity flag(default = False) Returns: ?: fpath_list CommandLine: python -m ibeis.algo.detect.randomforest --test-_get_models Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.detect.randomforest import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> species = ibeis.const.TEST_SPECIES.ZEB_PLAIN >>> modeldir = 'default' >>> cfg_override = True >>> verbose = False >>> fpath_list = _get_models(ibs, species, modeldir, cfg_override, verbose) >>> result = ('fpath_list = %s' % (str(fpath_list),)) >>> print(result) """ #with ut.embed_on_exception_context: if cfg_override and len(ibs.cfg.detect_cfg.trees_path) > 0: trees_path = ibs.cfg.detect_cfg.trees_path else: # Ensure all models downloaded and accounted for assert species is not None, '[_get_models] Cannot detect without specifying a species' grabmodels.ensure_models(modeldir=modeldir, verbose=verbose) trees_path = grabmodels.get_species_trees_paths(species, modeldir=modeldir) # Load tree paths if ut.checkpath(trees_path, verbose=verbose): fpath_list = ut.ls(trees_path, '*.txt') #direct = Directory(trees_path, include_extensions=['txt']) #files = direct.files() else: # If the models do not exist, return None fpath_list = None if fpath_list is None or len(fpath_list) == 0: msg = ut.codeblock(''' [_get_models] Error loading trees, either directory or fpath_list not found * trees_path = %r * fpath_list = %r * species = %r * model_dir = %r * cfg_override = %r ''') % (trees_path, fpath_list, species, modeldir, cfg_override) raise AssertionError(msg) return fpath_list
def init_console2(): assert ut.WIN32, 'win32 only script' url = 'http://downloads.sourceforge.net/project/console/console-devel/2.00/Console-2.00b148-Beta_32bit.zip' unzipped_fpath = ut.grab_zipped_url(url) # FIXME: bugged unzipped_fpath2 = join(dirname(unzipped_fpath), 'Console2') win32_bin = ut.truepath('~/local/PATH') ut.copy(ut.ls(unzipped_fpath2), win32_bin)
def list_published_distinctivness(): r""" CommandLine: python -m ibeis.algo.hots.distinctiveness_normalizer --test-list_published_distinctivness Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.distinctiveness_normalizer import * # NOQA >>> published_fpaths = list_published_distinctivness() >>> print(ut.list_str(published_fpaths)) """ published_fpaths = ut.ls(PUBLISH_DIR) return published_fpaths
def list_published_distinctivness(): r""" CommandLine: python -m wbia.algo.hots.distinctiveness_normalizer --test-list_published_distinctivness Example: >>> # SLOW_DOCTEST >>> from wbia.algo.hots.distinctiveness_normalizer import * # NOQA >>> published_fpaths = list_published_distinctivness() >>> print(ut.repr2(published_fpaths)) """ published_fpaths = ut.ls(PUBLISH_DIR) return published_fpaths
def load_splitsets(dataset): import parse fpath_dict = {} fmtstr = dataset.get_split_fmtstr(forward=False) for fpath in ut.ls(dataset.split_dpath): parsed = parse.parse(fmtstr, basename(fpath)) if parsed is None: print('WARNING: invalid filename %r' % (fpath,)) continue key = parsed['key'] type_ = parsed['type_'] splitset = fpath_dict.get(key, {}) splitset[type_] = fpath fpath_dict[key] = splitset # check validity of loaded data for key, val in fpath_dict.items(): assert 'data' in val, 'subset missing data' dataset.fpath_dict.update(**fpath_dict)
def list_distinctivness_cache(): global_distinctdir = sysres.get_global_distinctiveness_modeldir() print(ut.list_str(ut.ls(global_distinctdir)))
def test_pyrf(): r""" CommandLine: python run_tests.py --test-test_pyrf Example: >>> # ENABLE_DOCTEST >>> from run_tests import * # NOQA >>> result = test_pyrf() >>> print(result) """ #================================= # Initialization #================================= category = 'zebra_plains' #detect_config = { # 'save_detection_images': True, # 'percentage_top': 0.40, #} testdata_dir = ut.unixpath('~/code/pyrf/results') # assert ut.checkpath(testdata_dir) if ut.get_argflag('--vd'): print(ut.ls(testdata_dir)) # Create detector detector = Random_Forest_Detector() test_path = ut.grab_zipped_url(TEST_DATA_DETECT_URL, appname='utool') models_path = ut.grab_zipped_url(TEST_DATA_MODEL_URL, appname='utool') trees_path = join(models_path, category) detect_path = join(test_path, category, 'detect') ut.ensuredir(detect_path) ut.ensuredir(test_path) ut.ensuredir(trees_path) #================================= # Load Input Images #================================= # Get input images big_gpath_list = ut.list_images(test_path, fullpath=True, recursive=False) print(big_gpath_list) # Resize images to standard size if ut.get_argflag('--small'): big_gpath_list = big_gpath_list[0:8] #big_gpath_list = big_gpath_list[0:8] output_dir = join(test_path, 'resized') std_gpath_list = resize_imagelist_to_sqrtarea(big_gpath_list, sqrt_area=800, output_dir=output_dir, checkexists=True) dst_gpath_list = [join(detect_path, split(gpath)[1]) for gpath in std_gpath_list] #ut.view_directory(test_path) #ut.view_directory('.') print(std_gpath_list) num_images = len(std_gpath_list) #assert num_images == 16, 'the test has diverged!' print('Testing on %r images' % num_images) #================================= # Load Pretrained Forests #================================= # Load forest, so we don't have to reload every time trees_fpath_list = ut.ls(trees_path, '*.txt') #forest = detector.load(trees_path, category + '-') forest = detector.forest(trees_fpath_list) #detector.set_detect_params(**detect_config) results_list1 = [] #================================= # Detect using Random Forest #================================= with ut.Timer('[test_pyrf] for loop detector.detect') as t1: if not ut.get_argflag('--skip1'): results_list1 = detector.detect(forest, std_gpath_list, output_gpath_list=dst_gpath_list) #for ix, (img_fpath, dst_fpath) in enumerate(zip(std_gpath_list, dst_gpath_list)): # #img_fname = split(img_fpath)[1] # #dst_fpath = join(detect_path, img_fname) # #print(' * img_fpath = %r' % img_fpath) # #print(' * dst_fpath = %r' % dst_fpath) # with ut.Timer('[test_pyrf] detector.detect ix=%r' % (ix,)): # results = detector.detect(forest, img_fpath, dst_fpath) # results_list1.append(results) # print('num results = %r' % len(results)) #else: # print('...skipped') #with ut.Timer('[test_pyrf] detector.detect_many') as t2: # results_list2 = detector.detect_many(forest, std_gpath_list, # dst_gpath_list, use_openmp=True) detector.free_forest(forest) print('') print('+ --------------') print('| total time1: %r' % t1.ellapsed) #print('| total time2: %r' % t2.ellapsed) print('|') print('| num results1 = %r' % (list(map(len, results_list1)))) #print('| num results2 = %r' % (list(map(len, results_list2)))) #assert results_list2 == results_list1 return locals()
def load_oxford_2007(): """ Loads data from http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf >>> from wbia.algo.smk.script_smk import * # NOQA """ from os.path import join, basename, splitext import pandas as pd import vtool as vt dbdir = ut.truepath('/raid/work/Oxford/') data_fpath0 = join(dbdir, 'data_2007.pkl') if ut.checkpath(data_fpath0): data = ut.load_data(data_fpath0) return data else: word_dpath = join(dbdir, 'word_oxc1_hesaff_sift_16M_1M') _word_fpath_list = ut.ls(word_dpath) imgid_to_word_fpath = { splitext(basename(word_fpath))[0]: word_fpath for word_fpath in _word_fpath_list } readme_fpath = join(dbdir, 'README2.txt') imgid_order = ut.readfrom(readme_fpath).split('\n')[20:-1] imgid_order = imgid_order data_uri_order = [x.replace('oxc1_', '') for x in imgid_order] imgid_to_df = {} for imgid in ut.ProgIter(imgid_order, label='reading kpts'): word_fpath = imgid_to_word_fpath[imgid] row_gen = (map(float, line.strip('\n').split(' ')) for line in ut.read_lines_from(word_fpath)[2:]) rows = [(int(word_id), x, y, e11, e12, e22) for (word_id, x, y, e11, e12, e22) in row_gen] df = pd.DataFrame( rows, columns=['word_id', 'x', 'y', 'e11', 'e12', 'e22']) imgid_to_df[imgid] = df df_list = ut.take(imgid_to_df, imgid_order) nfeat_list = [len(df_) for df_ in df_list] offset_list = [0] + ut.cumsum(nfeat_list) shape = (offset_list[-1], 128) # shape = (16334970, 128) sift_fpath = join(dbdir, 'OxfordSIFTDescriptors', 'feat_oxc1_hesaff_sift.bin') try: file_ = open(sift_fpath, 'rb') with ut.Timer('Reading SIFT binary file'): nbytes = np.prod(shape) all_vecs = np.fromstring(file_.read(nbytes), dtype=np.uint8) all_vecs = all_vecs.reshape(shape) finally: file_.close() kpts_list = [ df_.loc[:, ('x', 'y', 'e11', 'e12', 'e22')].values for df_ in df_list ] wordid_list = [df_.loc[:, 'word_id'].values for df_ in df_list] kpts_Z = np.vstack(kpts_list) idx_to_wx = np.hstack(wordid_list) # assert len(np.unique(idx_to_wx)) == 1E6 # Reqd standard query order query_files = sorted( ut.glob(dbdir + '/oxford_groundtruth', '*_query.txt')) query_uri_order = [] for qpath in query_files: text = ut.readfrom(qpath, verbose=0) query_uri = text.split(' ')[0].replace('oxc1_', '') query_uri_order.append(query_uri) logger.info('converting to invV') all_kpts = vt.convert_kptsZ_to_kpts(kpts_Z) data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': all_vecs, 'idx_to_wx': idx_to_wx, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } ut.save_data(data_fpath0, data) return data
def build_linux_zip_binaries(): fpath_list = ut.ls('dist/ibeis') archive_fpath = 'dist/ibeis-linux-binary.zip' ut.archive_files(archive_fpath, fpath_list) return archive_fpath
def ls(self): return ut.ls(self.abspath)
def train_gid_list(ibs, gid_list, trees_path=None, species=None, setup=True, teardown=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection trees_path (str): the path that the trees will be saved into (along with temporary training inventory folders that are deleted once training is finished) species (str): the species that should be used to assign to the newly trained trees Kwargs (optional): refer to the PyRF documentation for configuration settings Returns: None """ print("[randomforest.train()] training with %d gids and species=%r" % ( len(gid_list), species, )) if trees_path is None and species is not None: trees_path = join(ibs.get_cachedir(), 'trees', species) # Get positive chip paths if species is None: aids_list = ibs.get_image_aids(gid_list) else: aids_list = ibs.get_image_aids_of_species(gid_list, species) # ##### TEMP ##### # gid_list_ = [] # aids_list_ = [] # for gid, aid_list in zip(gid_list, aids_list): # if len(aid_list) > 1: # gid_list_.append(gid) # aids_list_.append(aid_list) # elif len(aid_list) == 1: # (xtl, ytl, width, height) = ibs.get_annot_bboxes(aid_list)[0] # if xtl > 5 and ytl > 5: # gid_list_.append(gid) # aids_list_.append(aid_list) # gid_list = gid_list_ # aids_list = aids_list_ # kwargs['trees_max_patches'] = 100000 # ##### TEMP ##### aid_list = ut.flatten(aids_list) train_pos_cpath_list = ibs.get_annot_chip_fpath(aid_list) # Ensure directories for negatives negatives_cache = join(ibs.get_cachedir(), 'pyrf_train_negatives') if (setup and not exists(negatives_cache)) or setup == 'force': # Force Check if exists(negatives_cache): ut.remove_dirs(negatives_cache) ut.ensuredir(negatives_cache) # Get negative chip paths print("[randomforest.train()] Mining %d negative patches" % (len(train_pos_cpath_list), )) train_neg_cpath_list = [] while len(train_neg_cpath_list) < len(train_pos_cpath_list): sample = random.randint(0, len(gid_list) - 1) gid = gid_list[sample] img_width, img_height = ibs.get_image_sizes(gid) size = min(img_width, img_height) if species is None: aid_list = ibs.get_image_aids(gid) else: aid_list = ibs.get_image_aids_of_species(gid, species) annot_bbox_list = ibs.get_annot_bboxes(aid_list) # Find square patches square = random.randint(int(size / 4), int(size / 2)) xmin = random.randint(0, img_width - square) xmax = xmin + square ymin = random.randint(0, img_height - square) ymax = ymin + square if _valid_candidate((xmin, xmax, ymin, ymax), annot_bbox_list): if VERBOSE_RF: print( "[%d / %d] MINING NEGATIVE PATCH (%04d, %04d, %04d, %04d) FROM GID %d" % ( len(train_neg_cpath_list), len(train_pos_cpath_list), xmin, xmax, ymin, ymax, gid, )) img = ibs.get_images(gid) img_path = join( negatives_cache, "neg_%07d.JPEG" % (len(train_neg_cpath_list), )) img = img[ymin:ymax, xmin:xmax] cv2.imwrite(img_path, img) train_neg_cpath_list.append(img_path) else: train_neg_cpath_list = ut.ls(negatives_cache, '*.JPEG') #direct = Directory(negatives_cache, include_extensions=['JPEG']) #train_neg_cpath_list = direct.files() # Train trees train_gpath_list(ibs, train_pos_cpath_list, train_neg_cpath_list, trees_path=trees_path, species=species, **kwargs) # Remove cached negatives directory if teardown: ut.remove_dirs(negatives_cache)
def batch_move(r, search, repl, force=False): r""" This function has not yet been successfully implemented. Its a start though. rob batch_move train_patchmetric\(.*\) patchmatch\1 False ut.named_field('rest', '.*' ) ut.backref_field('rest') search = 'train_patchmetric(?P<rest>.*)' repl = 'patchmatch\\g<rest>' """ force = rutil.cast(force, bool) # rob batch_move '\(*\)util.py' 'util_\1.py' print('Batch Move') print('force = %r' % force) print('search = %r' % search) print('repl = %r' % repl) dpath_list = [os.getcwd()] spec_open = ['\\(', '\('] spec_close = ['\\)', '\)'] special_repl_strs = ['\1', '\\1'] print('special_repl_strs = %r' % special_repl_strs) print('special_search_strs = %r' % ((spec_open, spec_close,),)) search_pat = ut.extend_regex(search) #for spec in spec_open + spec_close: # search_pat = search_pat.replace(spec, '') print('search_pat=%r' % search_pat) include_patterns = [search_pat] import utool as ut import re fpath_list = ut.ls('.') matching_fpaths = [fpath for fpath in fpath_list if re.search(search_pat, basename(fpath))] repl_fpaths = [re.sub(search_pat, repl, fpath) for fpath in matching_fpaths] ut.rrrr() for fpath1, fpath2 in zip(matching_fpaths, repl_fpaths): ut.util_path.copy(fpath1, fpath2, deeplink=False, dryrun=False) #for fpath in rob_nav._matching_fnames(dpath_list, include_patterns, recursive=False): # print(fpath) return parse_str = search for spec in spec_open: parse_str = parse_str.replace(spec, '{') for spec in spec_close: parse_str = parse_str.replace(spec, '}') parse_str = parse_str.replace('{*}', '{}') print('parse_str = %r' % parse_str) for fpath in rob_nav._matching_fnames(dpath_list, include_patterns, recursive=False): dpath, fname = split(fpath) name, ext = splitext(fname) # Hard coded parsing parsed = parse.parse(parse_str, fname) repl1 = parsed[0] #print(fname) newfname = 'util_' + repl1 + ext newfpath = join(dpath, newfname) print('move') print(fpath) print(newfpath) if force is True: shutil.move(fpath, newfpath) print('real run') else: print('dry run') pass
def list_distinctivness_cache(): global_distinctdir = sysres.get_global_distinctiveness_modeldir() logger.info(ut.repr2(ut.ls(global_distinctdir)))
def test_pyrf(): category = 'zebra_plains' detect_config = { 'save_detection_images': True, 'save_scales': True, 'percentage_top': 0.40, } #================================= # Train / Detect Initialization #================================= testdata_dir = utool.unixpath('~/code/pyrf/results') # assert utool.checkpath(testdata_dir) if utool.get_argflag('--vd'): print(utool.ls(testdata_dir)) # Create detector detector = Random_Forest_Detector() test_path = utool.grab_zipped_url(TEST_DATA_DETECT_URL, appname='utool') models_path = utool.grab_zipped_url(TEST_DATA_MODEL_URL, appname='utool') trees_path = join(models_path, category) results_path = join(utool.unixpath('~/code/pyrf/results'), category) # detect_path = join(results_path, 'detect') trees_path = join(results_path, 'trees') detect_path = join(test_path, category, 'detect') utool.ensuredir(detect_path) utool.ensuredir(test_path) utool.ensuredir(trees_path) #================================= # Detect using Random Forest #================================= # Get input images from vtool import image big_gpath_list = utool.list_images(test_path, fullpath=True, recursive=False) print(big_gpath_list) # Resize images to standard size if utool.get_argflag('--small'): big_gpath_list = big_gpath_list[0:8] #big_gpath_list = big_gpath_list[0:8] output_dir = join(test_path, 'resized') std_gpath_list = image.resize_imagelist_to_sqrtarea(big_gpath_list, sqrt_area=800, output_dir=output_dir, checkexists=True) dst_gpath_list = [join(detect_path, split(gpath)[1]) for gpath in std_gpath_list] #utool.view_directory(test_path) #utool.view_directory('.') print(std_gpath_list) num_images = len(std_gpath_list) #assert num_images == 16, 'the test has diverged!' print('Testing on %r images' % num_images) # Load forest, so we don't have to reload every time forest = detector.load(trees_path, category + '-', num_trees=25) detector.set_detect_params(**detect_config) results_list1 = [] with utool.Timer('[test_pyrf] for loop detector.detect') as t1: if not utool.get_argflag('--skip1'): for ix, (img_fpath, dst_fpath) in enumerate(zip(std_gpath_list, dst_gpath_list)): #img_fname = split(img_fpath)[1] #dst_fpath = join(detect_path, img_fname) #print(' * img_fpath = %r' % img_fpath) #print(' * dst_fpath = %r' % dst_fpath) with utool.Timer('[test_pyrf] detector.detect ix=%r' % (ix,)): results = detector.detect(forest, img_fpath, dst_fpath) results_list1.append(results) print('num results = %r' % len(results)) else: print('...skipped') # with utool.Timer('[test_pyrf] detector.detect_many') as t2: # results_list2 = detector.detect_many(forest, std_gpath_list, # dst_gpath_list, use_openmp=True) print('') print('+ --------------') print('| total time1: %r' % t1.ellapsed) # print('| total time2: %r' % t2.ellapsed) print('|') print('| num results1 = %r' % (list(map(len, results_list1)))) # print('| num results2 = %r' % (list(map(len, results_list2)))) #assert results_list2 == results_list1 return locals()
def make_wordfigures(ibs, metrics, invindex, figdir, wx_sample, wx2_dpath): """ Builds mosaics of patches assigned to words in sample ouptuts them to disk """ from plottool import draw_func2 as df2 import vtool as vt import parse vocabdir = join(figdir, 'vocab_patches2') ut.ensuredir(vocabdir) dump_word_patches(ibs, vocabdir, invindex, wx_sample, metrics) # COLLECTING PART --- collects patches in word folders #vocabdir seldpath = vocabdir + '_selected' ut.ensurepath(seldpath) # stack for show for wx, dpath in ut.progiter(six.iteritems(wx2_dpath), lbl='Dumping Word Images:', num=len(wx2_dpath), freq=1, backspace=False): #df2.rrr() fpath_list = ut.ls(dpath) fname_list = [basename(fpath_) for fpath_ in fpath_list] patch_list = [gtool.imread(fpath_) for fpath_ in fpath_list] # color each patch by nid nid_list = [int(parse.parse('{}_nid={nid}_{}', fname)['nid']) for fname in fname_list] nid_set = set(nid_list) nid_list = np.array(nid_list) if len(nid_list) == len(nid_set): # no duplicate names newpatch_list = patch_list else: # duplicate names. do coloring sortx = nid_list.argsort() patch_list = np.array(patch_list, dtype=object)[sortx] fname_list = np.array(fname_list, dtype=object)[sortx] nid_list = nid_list[sortx] colors = (255 * np.array(df2.distinct_colors(len(nid_set)))).astype(np.int32) color_dict = dict(zip(nid_set, colors)) wpad, hpad = 3, 3 newshape_list = [tuple((np.array(patch.shape) + (wpad * 2, hpad * 2, 0)).tolist()) for patch in patch_list] color_list = [color_dict[nid_] for nid_ in nid_list] newpatch_list = [np.zeros(shape) + color[None, None] for shape, color in zip(newshape_list, color_list)] for patch, newpatch in zip(patch_list, newpatch_list): newpatch[wpad:-wpad, hpad:-hpad, :] = patch #img_list = patch_list #bigpatch = vt.stack_image_recurse(patch_list) #bigpatch = vt.stack_image_list(patch_list, vert=False) bigpatch = vt.stack_square_images(newpatch_list) bigpatch_fpath = join(seldpath, basename(dpath) + '_patches.png') # def _dictstr(dict_): str_ = ut.dict_str(dict_, newlines=False) str_ = str_.replace('\'', '').replace(': ', '=').strip('{},') return str_ figtitle = '\n'.join([ 'wx=%r' % wx, 'stat(pdist): %s' % _dictstr(metrics.wx2_pdist_stats[wx]), 'stat(wdist): %s' % _dictstr(metrics.wx2_wdist_stats[wx]), ]) metrics.wx2_nMembers[wx] df2.figure(fnum=1, doclf=True, docla=True) fig, ax = df2.imshow(bigpatch, figtitle=figtitle) #fig.show() df2.set_figtitle(figtitle) df2.adjust_subplots(top=.878, bottom=0) df2.save_figure(1, bigpatch_fpath)
def train_gid_list(ibs, gid_list, trees_path=None, species=None, setup=True, teardown=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection trees_path (str): the path that the trees will be saved into (along with temporary training inventory folders that are deleted once training is finished) species (str): the species that should be used to assign to the newly trained trees Kwargs (optional): refer to the PyRF documentation for configuration settings Returns: None """ print("[randomforest.train()] training with %d gids and species=%r" % (len(gid_list), species)) if trees_path is None and species is not None: trees_path = join(ibs.get_cachedir(), "trees", species) # Get positive chip paths if species is None: aids_list = ibs.get_image_aids(gid_list) else: aids_list = ibs.get_image_aids_of_species(gid_list, species) # ##### TEMP ##### # gid_list_ = [] # aids_list_ = [] # for gid, aid_list in zip(gid_list, aids_list): # if len(aid_list) > 1: # gid_list_.append(gid) # aids_list_.append(aid_list) # elif len(aid_list) == 1: # (xtl, ytl, width, height) = ibs.get_annot_bboxes(aid_list)[0] # if xtl > 5 and ytl > 5: # gid_list_.append(gid) # aids_list_.append(aid_list) # gid_list = gid_list_ # aids_list = aids_list_ # kwargs['trees_max_patches'] = 100000 # ##### TEMP ##### aid_list = ut.flatten(aids_list) train_pos_cpath_list = ibs.get_annot_chip_fpath(aid_list) # Ensure directories for negatives negatives_cache = join(ibs.get_cachedir(), "pyrf_train_negatives") if (setup and not exists(negatives_cache)) or setup == "force": # Force Check if exists(negatives_cache): ut.remove_dirs(negatives_cache) ut.ensuredir(negatives_cache) # Get negative chip paths print("[randomforest.train()] Mining %d negative patches" % (len(train_pos_cpath_list),)) train_neg_cpath_list = [] while len(train_neg_cpath_list) < len(train_pos_cpath_list): sample = random.randint(0, len(gid_list) - 1) gid = gid_list[sample] img_width, img_height = ibs.get_image_sizes(gid) size = min(img_width, img_height) if species is None: aid_list = ibs.get_image_aids(gid) else: aid_list = ibs.get_image_aids_of_species(gid, species) annot_bbox_list = ibs.get_annot_bboxes(aid_list) # Find square patches square = random.randint(int(size / 4), int(size / 2)) xmin = random.randint(0, img_width - square) xmax = xmin + square ymin = random.randint(0, img_height - square) ymax = ymin + square if _valid_candidate((xmin, xmax, ymin, ymax), annot_bbox_list): if VERBOSE_RF: print( "[%d / %d] MINING NEGATIVE PATCH (%04d, %04d, %04d, %04d) FROM GID %d" % (len(train_neg_cpath_list), len(train_pos_cpath_list), xmin, xmax, ymin, ymax, gid) ) img = ibs.get_images(gid) img_path = join(negatives_cache, "neg_%07d.JPEG" % (len(train_neg_cpath_list),)) img = img[ymin:ymax, xmin:xmax] cv2.imwrite(img_path, img) train_neg_cpath_list.append(img_path) else: train_neg_cpath_list = ut.ls(negatives_cache, "*.JPEG") # direct = Directory(negatives_cache, include_extensions=['JPEG']) # train_neg_cpath_list = direct.files() # Train trees train_gpath_list(ibs, train_pos_cpath_list, train_neg_cpath_list, trees_path=trees_path, species=species, **kwargs) # Remove cached negatives directory if teardown: ut.remove_dirs(negatives_cache)
def make_wordfigures(ibs, metrics, invindex, figdir, wx_sample, wx2_dpath): """ Builds mosaics of patches assigned to words in sample ouptuts them to disk """ from plottool import draw_func2 as df2 import vtool as vt import parse vocabdir = join(figdir, 'vocab_patches2') ut.ensuredir(vocabdir) dump_word_patches(ibs, vocabdir, invindex, wx_sample, metrics) # COLLECTING PART --- collects patches in word folders #vocabdir seldpath = vocabdir + '_selected' ut.ensurepath(seldpath) # stack for show for wx, dpath in ut.progiter(six.iteritems(wx2_dpath), lbl='Dumping Word Images:', num=len(wx2_dpath), freq=1, backspace=False): #df2.rrr() fpath_list = ut.ls(dpath) fname_list = [basename(fpath_) for fpath_ in fpath_list] patch_list = [vt.imread(fpath_) for fpath_ in fpath_list] # color each patch by nid nid_list = [ int(parse.parse('{}_nid={nid}_{}', fname)['nid']) for fname in fname_list ] nid_set = set(nid_list) nid_list = np.array(nid_list) if len(nid_list) == len(nid_set): # no duplicate names newpatch_list = patch_list else: # duplicate names. do coloring sortx = nid_list.argsort() patch_list = np.array(patch_list, dtype=object)[sortx] fname_list = np.array(fname_list, dtype=object)[sortx] nid_list = nid_list[sortx] colors = (255 * np.array(df2.distinct_colors(len(nid_set)))).astype( np.int32) color_dict = dict(zip(nid_set, colors)) wpad, hpad = 3, 3 newshape_list = [ tuple( (np.array(patch.shape) + (wpad * 2, hpad * 2, 0)).tolist()) for patch in patch_list ] color_list = [color_dict[nid_] for nid_ in nid_list] newpatch_list = [ np.zeros(shape) + color[None, None] for shape, color in zip(newshape_list, color_list) ] for patch, newpatch in zip(patch_list, newpatch_list): newpatch[wpad:-wpad, hpad:-hpad, :] = patch #img_list = patch_list #bigpatch = vt.stack_image_recurse(patch_list) #bigpatch = vt.stack_image_list(patch_list, vert=False) bigpatch = vt.stack_square_images(newpatch_list) bigpatch_fpath = join(seldpath, basename(dpath) + '_patches.png') # def _dictstr(dict_): str_ = ut.dict_str(dict_, newlines=False) str_ = str_.replace('\'', '').replace(': ', '=').strip('{},') return str_ figtitle = '\n'.join([ 'wx=%r' % wx, 'stat(pdist): %s' % _dictstr(metrics.wx2_pdist_stats[wx]), 'stat(wdist): %s' % _dictstr(metrics.wx2_wdist_stats[wx]), ]) metrics.wx2_nMembers[wx] df2.figure(fnum=1, doclf=True, docla=True) fig, ax = df2.imshow(bigpatch, figtitle=figtitle) #fig.show() df2.set_figtitle(figtitle) df2.adjust_subplots(top=.878, bottom=0) df2.save_figure(1, bigpatch_fpath)
if fpaths: cmd_to_fpaths[cmd].extend(fpaths) for key in cmd_to_fpaths.keys(): cmd = key.lstrip('\\') if not root.find_descendant_type(cmd): print(key) from os.path import abspath, dirname used_fpaths = ut.flatten(cmd_to_fpaths.values()) used_fpaths = set(ut.emap(abspath, used_fpaths)) all_fpaths = set(ut.emap(abspath, ut.glob('.', ['*.png', '*.jpg'], recursive=True))) unused = list(all_fpaths - used_fpaths) unuse_dirs = ut.group_items(unused, ut.emap(dirname, unused)) semi_used = {} for dpath, fpaths in unuse_dirs.items(): used_in_dpath = set(ut.ls(dpath)) - set(fpaths) if len(used_in_dpath) == 0: # completely unused directories print(dpath) else: semi_used[dpath] = fpaths print(ut.repr4(list(semi_used.keys())))