def sanity_checks(offset_list, Y_list, query_annots, ibs): nfeat_list = np.diff(offset_list) for Y, nfeat in ut.ProgIter(zip(Y_list, nfeat_list), 'checking'): assert nfeat == sum(ut.lmap(len, Y.fxs_list)) if False: # Visualize queries # Look at the standard query images here # http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf from wbia.viz import viz_chip import wbia.plottool as pt pt.qt4ensure() fnum = 1 pnum_ = pt.make_pnum_nextgen(len(query_annots.aids) // 5, 5) for aid in ut.ProgIter(query_annots.aids): pnum = pnum_() viz_chip.show_chip( ibs, aid, in_image=True, annote=False, notitle=True, draw_lbls=False, fnum=fnum, pnum=pnum, )
def update_registry(drive): print('Updating registered files in %r' % (drive, )) # Update existing files fpath_exists_list = list( map(exists, ut.ProgIter(drive.fpath_list, 'checkexist fpath', freq=1000))) dpath_exists_list = list( map(exists, ut.ProgIter(drive.dpath_list, 'checkexist dpath', freq=1000))) if all(fpath_exists_list): print('No change in file structure') else: print('%d/%d files no longer exist' % (len(drive.fpath_list) - sum(fpath_exists_list), len(drive.fpath_list))) removed_fpaths = ut.compress(drive.fpath_list, ut.not_list(fpath_exists_list)) print('removed_fpaths = %s' % (ut.repr2(removed_fpaths), )) if all(dpath_exists_list): print('No change in dpath structure') else: print('%d/%d dirs no longer exist' % (len(drive.dpath_list) - sum(dpath_exists_list), len(drive.dpath_list))) removed_dpaths = ut.compress(drive.dpath_list, ut.not_list(dpath_exists_list)) print('removed_dpaths = %s' % (ut.repr2(removed_dpaths), )) drive.fpath_list = ut.compress(drive.fpath_list, fpath_exists_list) drive.dpath_list = ut.compress(drive.dpath_list, dpath_exists_list) drive.cache.save('fpath_list', drive.fpath_list) drive.cache.save('dpath_list', drive.dpath_list)
def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2)
def _enriched_pairwise_matches(extr, edges, prog_hook=None): """ Adds extra domain specific local and global properties that the match object (feature corresopndences) doesnt directly provide. Example: >>> # xdoctest: +REQUIRES(module:wbia_cnn) >>> # ENABLE_DOCTEST >>> from wbia.algo.verif.pairfeat import * # NOQA >>> import wbia >>> ibs = wbia.opendb('testdb1') >>> match_config = { >>> 'K': 1, 'Knorm': 3, 'affine_invariance': True, >>> 'augment_orientation': True, 'checks': 20, 'ratio_thresh': 0.8, >>> 'refine_method': 'homog', 'sv_on': True, 'sver_xy_thresh': 0.01, >>> 'symmetric': True, 'weight': 'fgweights' >>> } >>> global_keys = ['gps', 'qual', 'time'] >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config, >>> global_keys=global_keys) >>> assert extr.global_keys == global_keys >>> edges = [(1, 2), (2, 3)] >>> prog_hook = None >>> match_list = extr._enriched_pairwise_matches(edges) >>> match1, match2 = match_list >>> assert match1.annot2 is match2.annot1 >>> assert match1.annot1 is not match2.annot2 >>> print('match1.global_measures = {!r}'.format(match1.global_measures)) >>> assert len(match1.global_measures) == 3, 'global measures' """ # logger.info('extr.global_keys = {!r}'.format(extr.global_keys)) if extr.global_keys is None: raise ValueError('specify global keys') # global_keys = ['view_int', 'qual', 'gps', 'time'] # global_keys = ['view', 'qual', 'gps', 'time'] matches = extr._exec_pairwise_match(edges, prog_hook=prog_hook) if extr.need_lnbnn: extr._enrich_matches_lnbnn(matches, inplace=True) if extr.verbose: logger.info('[extr] enriching match attributes') # Ensure matches know about relavent metadata for match in matches: vt.matching.ensure_metadata_normxy(match.annot1) vt.matching.ensure_metadata_normxy(match.annot2) for match in ut.ProgIter(matches, label='setup globals'): match.add_global_measures(extr.global_keys) for match in ut.ProgIter(matches, label='setup locals'): match.add_local_measures() return matches
def find_pos_redun_candidate_edges(infr, k=None, verbose=False): r""" Searches for augmenting edges that would make PCCs k-positive redundant Doctest: >>> from wbia.algo.graph.mixin_matching import * # NOQA >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(ccs=[(1, 2, 3, 4, 5), (7, 8, 9, 10)]) >>> infr.add_feedback((2, 5), 'match') >>> infr.add_feedback((1, 5), 'notcomp') >>> infr.params['redun.pos'] = 2 >>> candidate_edges = list(infr.find_pos_redun_candidate_edges()) >>> result = ('candidate_edges = ' + ut.repr2(candidate_edges)) >>> print(result) candidate_edges = [] """ # Add random edges between exisiting non-redundant PCCs if k is None: k = infr.params['redun.pos'] # infr.find_non_pos_redundant_pccs(k=k, relax=True) pcc_gen = list(infr.positive_components()) prog = ut.ProgIter(pcc_gen, enabled=verbose, freq=1, adjust=False) for pcc in prog: if not infr.is_pos_redundant(pcc, k=k, relax=True, assume_connected=True): for edge in infr.find_pos_augment_edges(pcc, k=k): print() yield nxu.e_(*edge)
def _load_singles(qreq_): # Find existing cached chip matches # Try loading as many as possible fpath_list = qreq_.get_chipmatch_fpaths(qreq_.qaids) exists_flags = [exists(fpath) for fpath in fpath_list] qaids_hit = ut.compress(qreq_.qaids, exists_flags) fpaths_hit = ut.compress(fpath_list, exists_flags) # First, try a fast reload assuming no errors fpath_iter = ut.ProgIter( fpaths_hit, length=len(fpaths_hit), enabled=len(fpaths_hit) > 1, label='loading cache hits', adjust=True, freq=1, ) try: qaid_to_hit = { qaid: chip_match.ChipMatch.load_from_fpath(fpath, verbose=False) for qaid, fpath in zip(qaids_hit, fpath_iter) } except chip_match.NeedRecomputeError as ex: # Fallback to a slow reload ut.printex(ex, 'Some cached results need to recompute', iswarning=True) qaid_to_hit = _load_singles_fallback(fpaths_hit) return qaid_to_hit
def render_vocab(vocab): """ Renders the average patch of each word. This is a quick visualization of the entire vocabulary. CommandLine: python -m wbia.algo.smk.vocab_indexer render_vocab --show Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> vocab = testdata_vocab('PZ_MTEST', num_words=64) >>> all_words = vocab.render_vocab() >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> pt.qt4ensure() >>> pt.imshow(all_words) >>> ut.show_if_requested() """ import wbia.plottool as pt wx_list = list(range(len(vocab))) # wx_list = ut.strided_sample(wx_list, 64) wx_list = ut.strided_sample(wx_list, 64) word_patch_list = [] for wx in ut.ProgIter(wx_list, bs=True, lbl='building patches'): word = vocab.wx_to_word[wx] word_patch = vt.inverted_sift_patch(word, 64) word_patch = pt.render_sift_on_patch(word_patch, word) word_patch_list.append(word_patch) all_words = vt.stack_square_images(word_patch_list) return all_words
def find_connecting_edges(infr): """ Searches for a small set of edges, which if reviewed as positive would ensure that each PCC is k-connected. Note that in somes cases this is not possible """ label = 'name_label' node_to_label = infr.get_node_attrs(label) label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values()) # k = infr.params['redun.pos'] k = 1 new_edges = [] prog = ut.ProgIter( list(label_to_nodes.keys()), label='finding connecting edges', enabled=infr.verbose > 0, ) for nid in prog: nodes = set(label_to_nodes[nid]) G = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = nxu.edges_inside(infr.neg_graph, nodes) impossible |= nxu.edges_inside(infr.incomp_graph, nodes) candidates = set(nx.complement(G).edges()) candidates.difference_update(impossible) aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates) new_edges += aug_edges prog.ensure_newline() return new_edges
def get_patches(invassign, wx): ax_list = invassign.wx2_axs[wx] fx_list = invassign.wx2_fxs[wx] config = invassign.fstack.config ibs = invassign.fstack.ibs unique_axs, groupxs = vt.group_indices(ax_list) fxs_groups = vt.apply_grouping(fx_list, groupxs) unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs) all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config) sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0) chip_list = ibs.depc_annot.d.get_chips_img(unique_aids) # convert to approprate colorspace #if colorspace is not None: # chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 grouped_patches_list = [ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list), nTotal=len(unique_aids), lbl='warping patches') ] # Make it correspond with original fx_list and ax_list word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs) return word_patches
def preproc_has_tips(depc, aid_list, config=None): r""" HACK TO FIND ONLY ANNOTS THAT HAVE TIPS Args: depc (DependencyCache): aid_list (list): list of annotation rowids config (dict): (default = {}) Yields: tuple: (np.ndarray, np.ndarray, np.ndarray) CommandLine: python -m ibeis_flukematch.plugin --exec-preproc_has_tips --db testdb1 python -m ibeis_flukematch.plugin --exec-preproc_has_tips --dbdir /home/zach/data/IBEIS/humpbacks --no-cnn python -m ibeis_flukematch.plugin --exec-preproc_has_tips --dbdir /home/zach/data/IBEIS/humpbacks --no-cnn --clear-all-depcache python -m ibeis_flukematch.plugin --exec-preproc_has_tips --db humpbacks --no-cnn python -m ibeis_flukematch.plugin --exec-preproc_has_tips --db humpbacks --no-cnn --clear-all-depcache Example: >>> # DISABLE_DOCTEST >>> from ibeis_flukematch.plugin import * # NOQA >>> ibs = ibeis.opendb(defaultdb='humpbacks') >>> aid_list = ibs.get_valid_aids() >>> config = {} >>> propgen = preproc_has_tips(ibs.depc, aid_list, config) >>> result = list(propgen) >>> hasnotch_list = ut.take_column(result, 0) >>> num_with = sum(hasnotch_list) >>> valid_aids = ut.compress(aid_list, hasnotch_list) >>> ibs.append_annot_case_tags(valid_aids, ['hasnotch'] * len(valid_aids)) >>> print(ibs.get_annot_info(valid_aids[2], default=True)) >>> print('%r / %r annots have notches' % (num_with, len(aid_list))) """ print('Preprocess Has_Notch') print(config) config = config.copy() ibs = depc.controller fn = join(ibs.get_dbdir(), 'fluke_image_points.pkl') if not exists(fn): print('[fluke-module] ERROR: Could not find image points file') raise NotImplementedError('Could not find image points file') # this is a dict of img: dict of left/right/notch to the xy-point img_points_map = ut.load_cPkl(fn) img_names = ibs.get_annot_image_names(aid_list) for imgn in ut.ProgIter(img_names, lbl='Checking Has_Notch'): try: ( img_points_map[imgn]['notch'], img_points_map[imgn]['left'], img_points_map[imgn]['right'], ) except KeyError: yield (False, ) else: yield (True, )
def extract_patches(ibs, aid_list, fxs_list=None, patch_size=None, colorspace=None): """ Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.ingest_ibeis import * # NOQA >>> ut.show_if_requested() """ depc = ibs.depc kpts_list = depc.d.get_feat_kpts(aid_list) if fxs_list is None: fxs_list = [slice(None)] * len(kpts_list) kpts_list_ = ut.ziptake(kpts_list, fxs_list) chip_list = depc.d.get_chips_img(aid_list) # convert to approprate colorspace if colorspace is not None: chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 patches_list = [ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] for chip, kpts in ut.ProgIter(zip(chip_list, kpts_list_), nTotal=len(aid_list), lbl='warping patches') ] return patches_list
def register_files(drive): print('Loading registered files in %r' % (drive, )) try: fpath_list = drive.cache.load('fpath_registry') dpath_list = drive.cache.load('dpath_registry') except ut.CacheMissException: print('Recomputing registry') fpath_gen_list = [] dpath_gen_list = [] for root, dname_list, fname_list in ut.ProgIter(os.walk( drive.root_dpath), 'walking', freq=1000): # Ignore hidden directories dname_list[:] = [ d for d in dname_list if not d.startswith('.') ] fpath_gen_list.append((root, fname_list)) dpath_gen_list.append((root, dname_list)) fpath_list = [ join(root, f) for root, fs in fpath_gen_list for f in fs ] dpath_list = [ join(root, d) for root, ds in dpath_gen_list for d in ds ] dpath_list = [drive.root_dpath] + dpath_list print('Regsitering %d files and %d directories' % (len(fpath_list), len(dpath_list))) drive.cache.save('fpath_registry', fpath_list) drive.cache.save('dpath_registry', dpath_list) print('Loaded %d files and %d directories' % (len(fpath_list), len(dpath_list))) drive.fpath_list_ = fpath_list drive.dpath_list_ = dpath_list
def oracle_review(sim): queue_params = { 'pos_diameter': None, 'neg_diameter': None, } infr = sim.infr prev = infr.verbose infr.verbose = 0 # rng = np.random.RandomState(0) infr = sim.infr primary_truth = sim.primary_truth review_edges = infr.generate_reviews(**queue_params) max_reviews = 1000 for count, (aid1, aid2) in enumerate(ut.ProgIter(review_edges)): state = primary_truth.loc[(aid1, aid2)].idxmax() tags = [] infr.add_feedback(aid1, aid2, state, tags, apply=True, rectify=False, user_id='oracle', confidence='absolutely_sure') if count > max_reviews: break infr.verbose = prev sim.results['max_reviews'] = max_reviews n_clusters, n_inconsistent = infr.relabel_using_reviews(rectify=False) assert n_inconsistent == 0, 'should not create any inconsistencies' sim.results['n_user_clusters'] = n_clusters # infr.apply_review_inference() curr_decisions = infr.edge_attr_df('decision') curr_truth = primary_truth.loc[curr_decisions.index].idxmax(axis=1) n_user_mistakes = curr_decisions != curr_truth sim.results['n_user_mistakes'] = sum(n_user_mistakes) gt_clusters = ut.group_pairs(infr.gen_node_attrs('orig_name_label')) curr_clusters = ut.group_pairs(infr.gen_node_attrs('name_label')) compare_results = compare_groups(list(gt_clusters.values()), list(curr_clusters.values())) sim.results.update(ut.map_vals(len, compare_results)) common_per_num = ut.group_items(compare_results['common'], map(len, compare_results['common'])) sumafter = 3 greater = [i for i in common_per_num.keys() if i > sumafter] common_per_num['>%s' % sumafter] = ut.flatten( ut.take(common_per_num, greater)) ut.delete_keys(common_per_num, greater) for k, v in common_per_num.items(): sim.results['common@' + str(k)] = len(v) sim.results['n_names_common'] = len(compare_results['common'])
def get_annotmatch_rowids_between_groups(ibs, aids1_list, aids2_list): ams_list = [] lbl = 'loading between group am rowids' for aids1, aids2 in ut.ProgIter(list(zip(aids1_list, aids2_list)), lbl=lbl): ams = get_annotmatch_rowids_between(ibs, aids1, aids2) ams_list.append(ams) return ams_list
def main(): if True: import pandas as pd pd.options.display.max_rows = 1000 pd.options.display.width = 1000 basis = { #'n_clusters': [10, 100, 1000, 2000][::-1], #'n_features': [4, 32, 128, 512][::-1], #'per_cluster': [1, 10, 100, 200][::-1], 'n_clusters': [10, 100, 500][::-1], 'n_features': [32, 128][::-1], 'per_cluster': [1, 10, 20][::-1], 'asint': [True, False], } vals = [] for kw in ut.ProgIter(ut.all_dict_combinations(basis), lbl='gridsearch', bs=False, adjust=False, freq=1): print('kw = ' + ut.repr2(kw)) exec(ut.execstr_dict(kw)) centers1, new_speed = test_kmeans_plus_plus_speed(fix=True, **kw) centers2, old_speed = test_kmeans_plus_plus_speed(fix=False, **kw) import utool with utool.embed_on_exception_context: assert np.all(centers1 == centers2), 'new code disagrees' kw['new_speed'] = new_speed kw['old_speed'] = old_speed vals.append(kw) print('---------') df = pd.DataFrame.from_dict(vals) df['percent_change'] = 100 * (df['old_speed'] - df['new_speed']) / df['old_speed'] df = df.reindex_axis(list(basis.keys()) + ['new_speed', 'old_speed', 'percent_change'], axis=1) df['absolute_change'] = (df['old_speed'] - df['new_speed']) print(df.sort('absolute_change', ascending=False)) #print(df) print(df['percent_change'][df['absolute_change'] > .1].mean()) #print(df.loc[df['percent_change'].argsort()[::-1]]) else: new_speed = test_kmeans_plus_plus_speed() try: profile.dump_stats('out.lprof') profile.print_stats(stripzeros=True) except Exception: pass print('new_speed = %r' % (new_speed, ))
def random_case_set(): r""" Returns: tuple: (labels, pairwise_feats) CommandLine: python -m ibeis.algo.hots.testem random_case_set --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.testem import * # NOQA >>> (labels, pairwise_feats) = random_case_set() >>> result = ('(labels, pairwise_feats) = %s' % (ut.repr2((labels, pairwise_feats)),)) >>> print(result) """ rng = np.random.RandomState(0) case_params = dict(num_names=5, rng=rng) num_annots = 600 test_cases = [ random_test_annot(**case_params) for _ in ut.ProgIter(range(num_annots), bs=1) ] pairxs = list(ut.product_nonsame(range(num_annots), range(num_annots))) import utool utool.embed() test_pairs = list(ut.unflat_take(test_cases, pairxs)) cases1 = ut.instancelist(ut.take_column(test_pairs, 0), check=False) cases2 = ut.instancelist(ut.take_column(test_pairs, 1), check=False) # FIXME labels = labels1 = make_test_pairwise_labels2(cases1, cases2) # NOQA #labels = np.array([make_test_pairwise_labels(case1, case2) # for case1, case2 in ut.ProgIter(test_pairs, bs=1)]) pairwise_feats_ = [ make_test_pairwise_fetaures(case1, case2, label, rng) for label, (case1, case2) in ut.ProgIter(list(zip(labels, test_pairs)), bs=1) ] pairwise_feats = np.vstack(pairwise_feats_) print(ut.dict_hist(labels)) return labels, pairwise_feats
def preproc_block_curvature(depc, te_rowids, config): r""" Args: depc (DependencyCache): aid_list (list): list of annotation rowids config (dict): (default = {'sizes': [5, 10, 15, 20]}) Yields: list: [np.ndarray] CommandLine: python -m ibeis_flukematch.plugin --exec-preproc_block_curvature --dbdir /home/zach/data/IBEIS/humpbacks --no-cnn python -m ibeis_flukematch.plugin --exec-preproc_block_curvature --db humpbacks --no-cnn Example: >>> # DISABLE_DOCTEST >>> from ibeis_flukematch.plugin import * # NOQA >>> ibs = ibeis.opendb(defaultdb='humpbacks') >>> all_aids = ibs.get_valid_aids() >>> isvalid = ibs.depc.get('Has_Notch', all_aids, 'flag', _debug=True) >>> aid_list = ut.compress(all_aids, isvalid)[0:4] >>> print('\n!!![test] aid_list = %r' % (aid_list,)) >>> depc = ibs.depc >>> config = {'sizes': [5, 10, 15, 20]} >>> te_rowids = depc.get_rowids('Trailing_Edge', aid_list, config) >>> print('te_rowids = %r' % (te_rowids,)) >>> propgen = preproc_block_curvature(depc, te_rowids, config) >>> curve_arr_list = list(propgen) >>> result = ut.depth_profile(curve_arr_list) >>> print(result) """ print('Preprocess Block_Curvature') print(config) ibs = depc.controller # NOTE: Need to use get_native_property because the take the type # of the parent (trailing ege) ids, not the root (annot) ids. # get the trailing edges # NOTE: Can specify a single column, so unpacking is done automatically tedges = ibs.depc.get_native_property('Trailing_Edge', te_rowids, 'edge') # FIXME: CONFIG sizes = list( range(config['csize_min'], config['csize_max'] + 1, config['csize_step'])) sizes = map(lambda x: float(x) / 100, sizes) # call flukematch.block_integral_curvatures_cpp progiter = ut.ProgIter(tedges, lbl='compute Block_Curvature') for tedge in progiter: if tedge is None: yield None else: curve_arr = block_integral_curvatures_cpp(sizes, tedge) yield (curve_arr, )
def get_annotmatch_rowids_in_cliques(ibs, aids_list): # Equivalent call: #ibs.get_annotmatch_rowids_between_groups(ibs, aids_list, aids_list) import itertools ams_list = [ ibs.get_annotmatch_rowid_from_undirected_superkey(*zip( *itertools.combinations(aids, 2))) for aids in ut.ProgIter(aids_list, lbl='loading clique am rowids') ] ams_list = [[] if ams is None else ut.filter_Nones(ams) for ams in ams_list] return ams_list
def _ratio_thresh(y_true, match_list): # Try and find optional ratio threshold auc_list = [] for cfgdict in ut.ProgIter(grid, lbl='gridsearch'): y_score = [ match.fs.compress(match.ratio_test_flags(cfgdict)).sum() for match in match_list ] auc = sklearn.metrics.roc_auc_score(y_true, y_score) auc_list.append(auc) auc_list = np.array(auc_list) return auc_list
def find_duplicates(index): # fpaths = list(index.files.keys()) files = list(index.files.values()) print('Grouping {} files'.format(len(files))) grouped = ut.group_items(files, [f.nbytes for f in files]) print('Found {} groups'.format(len(grouped))) potential_dups = {k: v for k, v in grouped.items() if len(v) > 1} print('Found {} potential dups by nbytes'.format(len(potential_dups))) GB = 2**30 # NOQA MB = 2**20 # NOQA max_bytes = 10 * MB min_bytes = 64 * MB duplicates = [] for k, fs in ut.ProgIter(potential_dups.items(), freq=1): names = [f.n for f in fs] if ut.allsame(names): # Don't do big files yet if k < max_bytes and k > min_bytes: if ut.allsame([f.hashid for f in fs]): duplicates.extend(fs) for f1, f2 in ut.combinations(fs, 2): f1.duplicates.add(f2) f2.duplicates.add(f1) def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2) # return sim_measures similarities = {} r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates]) for dpath, dups in r_to_dup.items(): # Check to see if the duplicates all point to the same dir f = dups[0] # NOQA common_dpath = set.intersection(*[{_.r for _ in f.duplicates} for f in dups]) for other in common_dpath: sim_measures = dpath_similarity(index, dpath, other) similarities[(dpath, other)] = sim_measures print(ut.repr4(similarities, si=True, nl=2))
def generate_class_images(dream, target_labels): """ import plottool as pt fnum = None kw = dict(init='gauss', niters=500, update_rate=.05, weight_decay=1e-4) target_labels = list(range(model.output_dims)) dream = draw_net.Dream(model, **kw) target_labels = 8 images = list(dream.generate_class_images(target_labels)) vid = vt.make_video(images, 'dynimg.pimj', fps=1, is_color=False, format='PIM1') vid = vt.make_video2(images, 'dynimg') import matplotlib.pyplot as plt ims = [] for img in imgs: im = plt.imshow(img[:, :, 0], interpolation='nearest', cmap='gray') ims.append([im]) import matplotlib.animation as animation fig = plt.figure() ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True, repeat_delay=1000) ani.save('dynamic_images.mp4') ut.startfile('dynamic_images.mp4') plt.show() """ import ibeis_cnn.__THEANO__ as theano from ibeis_cnn.__THEANO__ import tensor as T # NOQA import utool as ut input_shape = dream.model.input_shape b, c, w, h = input_shape was_scalar = not ut.isiterable(target_labels) target_labels = ut.ensure_iterable(target_labels) assert len(target_labels) <= b, 'batch size too small' initial_state = dream._make_init_state() shared_images = theano.shared(initial_state.astype(np.float32)) step_fn = dream._make_objective(shared_images, target_labels) out = dream._postprocess_class_image(shared_images, target_labels, was_scalar) yield out for _ in ut.ProgIter(range(dream.niters), lbl='class dream', bs=True): step_fn() # objective = step_fn() # print('objective = %r' % (objective,)) out = dream._postprocess_class_image(shared_images, target_labels, was_scalar) yield out
def batch_knn(indexer, vecs, K, chunksize=4096, label='batch knn'): """ Works like `indexer.knn` but the input is split into batches and progress is reported to give an esimated time remaining. """ # Preallocate output idxs = np.empty((vecs.shape[0], K), dtype=np.int32) dists = np.empty((vecs.shape[0], K), dtype=np.float32) # Generate chunk slices num_chunks = ut.get_num_chunks(vecs.shape[0], chunksize) iter_ = ut.ichunk_slices(vecs.shape[0], chunksize) prog = ut.ProgIter(iter_, length=num_chunks, label=label) for sl_ in prog: idxs[sl_], dists[sl_] = indexer.knn(vecs[sl_], K=K) return idxs, dists
def sight_resight_prob(N_range, nvisit1, nvisit2, resight): """ https://en.wikipedia.org/wiki/Talk:Mark_and_recapture#Statistical_treatment http://stackoverflow.com/questions/31439875/infinite-summation-in-python/31442749 """ k, K, n = resight, nvisit1, nvisit2 from scipy.special import comb N_range = np.array(N_range) def integers(start, blk_size=10000, pos=True, neg=False): x = np.arange(start, start + blk_size) while True: if pos: yield x if neg: yield -x - 1 x += blk_size def converge_inf_sum(func, x_strm, eps=1e-5, axis=0): # Can still be very slow total = np.sum(func(x_strm.next()), axis=axis) # for x_blk in ut.ProgIter(x_strm, lbl='converging'): for x_blk in x_strm: diff = np.sum(func(x_blk), axis=axis) total += diff # error = abs(np.linalg.norm(diff)) # logger.info('error = %r' % (error,)) if np.sqrt(diff.ravel().dot(diff.ravel())) <= eps: # Converged break return total numers = comb(N_range - K, n - k) / comb(N_range, n) @ut.memoize def func(N_): return comb(N_ - K, n - k) / comb(N_, n) denoms = [] for N in ut.ProgIter(N_range, lbl='denoms'): x_strm = integers(start=(N + n - k), blk_size=100) denom = converge_inf_sum(func, x_strm, eps=1e-3) denoms.append(denom) # denom = sum([func(N_) for N_ in range(N_start, N_start * 2)]) probs = numers / np.array(denoms) return probs
def build_dpath_to_fidx(fpath_list, fidx_list, root_dpath): dpath_to_fidx = ut.ddict(list) nTotal = len(fpath_list) _iter = zip(fidx_list, fpath_list) dpath_to_fidx = ut.ddict(list) for fidx, fpath in ut.ProgIter(_iter, 'making dpath fidx map', freq=50000, nTotal=nTotal): current_path = fpath while True: current_path = dirname(current_path) dpath_to_fidx[current_path].append(fidx) if current_path == root_dpath: break return dpath_to_fidx
def find_opt_ratio(pblm): """ script to help find the correct value for the ratio threshold >>> from wbia.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty('PZ_PB_RF_TRAIN') >>> pblm = OneVsOneProblem.from_empty('GZ_Master1') """ # Find best ratio threshold pblm.load_samples() infr = pblm.infr edges = ut.emap(tuple, pblm.samples.aid_pairs.tolist()) task = pblm.samples['match_state'] pos_idx = task.class_names.tolist().index(POSTV) config = {'ratio_thresh': 1.0, 'sv_on': False} matches = infr._exec_pairwise_match(edges, config) import wbia.plottool as pt import sklearn.metrics pt.qtensure() thresholds = np.linspace(0, 1.0, 100) pos_truth = task.y_bin.T[pos_idx] ratio_fs = [m.local_measures['ratio'] for m in matches] aucs = [] # Given the current correspondences: Find the optimal # correspondence threshold. for thresh in ut.ProgIter(thresholds, 'computing thresh'): scores = np.array([fs[fs < thresh].sum() for fs in ratio_fs]) roc = sklearn.metrics.roc_auc_score(pos_truth, scores) aucs.append(roc) aucs = np.array(aucs) opt_auc = aucs.max() opt_thresh = thresholds[aucs.argmax()] if True: pt.plt.plot(thresholds, aucs, 'r-', label='') pt.plt.plot(opt_thresh, opt_auc, 'ro', label='L opt=%r' % (opt_thresh, )) pt.set_ylabel('auc') pt.set_xlabel('ratio threshold') pt.legend()
def build_fpath_hashes(drive): try: fpath_hashX_list = drive.cache.load('fpath_hashX_list') except ut.CacheMissException: fpath_hashX_list = [None] * len(drive.fpath_list) assert len(drive.fpath_bytes_list) == len(drive.fpath_list) tier_windows = drive.get_tier_windows() tier_flags = drive.get_tier_flags() tier_fpaths = [ ut.compress(drive.fpath_list, flags) for flags in tier_flags ] #for tier, fpaths in enumerate(tier_fpaths): #chosen_tiers = [6, 5, 4, 3, 2, 1, 0] chosen_tiers = list(range(len(tier_windows)))[::-1] for tier in chosen_tiers: window = np.array(tier_windows[tier]) minbytes = window[np.isfinite(window)].min() #stride = max(1, minbytes // (2 ** 20)) stride = max(1, minbytes // (2**20)) print('%s tier %d stride = %r' % ( drive.root_dpath, tier, stride, )) fpaths = tier_fpaths[tier] print('# fpaths = %r' % (len(fpaths), )) tier_hashes = [ tryhash(fpath, stride) for fpath in ut.ProgIter( fpaths, 'tier=%r hashes' % (tier, ), freq=100) ] #import register_files #tier_hashes = list(ut.buffered_generator(( # register_files.tryhash(fpath, stride) for fpath in # ut.ProgIter(fpaths, 'tier=%r hashes' % (tier,), freq=100) #))) tier_idxs = np.where(tier_flags[tier])[0] for idx, hash_ in zip(tier_idxs, tier_hashes): fpath_hashX_list[idx] = hash_ drive.cache.save('fpath_hashX_list', fpath_hashX_list) return fpath_hashX_list
def _load_singles_fallback(fpaths_hit): fpath_iter = ut.ProgIter(fpaths_hit, enabled=len(fpaths_hit) > 1, label='checking chipmatch cache', adjust=True, freq=1) # Recompute those that fail loading qaid_to_hit = {} for fpath in fpath_iter: try: cm = chip_match.ChipMatch.load_from_fpath(fpath, verbose=False) except chip_match.NeedRecomputeError: pass else: qaid_to_hit[cm.qaid] = cm print('%d / %d cached matches need to be recomputed' % (len(fpaths_hit) - len(qaid_to_hit), len(fpaths_hit))) return qaid_to_hit
def make_tree_structure(valid_fpaths): root = {} def dict_getitem_default(dict_, key, type_): try: val = dict_[key] except KeyError: val = type_() dict_[key] = val return val for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000): path_components = ut.dirsplit(fpath) current = root for comp in path_components[:-1]: current = dict_getitem_default(current, comp, dict) contents = dict_getitem_default(current, '.', list) contents.append(path_components[-1]) return root
def get_fpath_bytes_list(drive): print('Building fpath bytes for %r' % (drive, )) try: fpath_bytes_list = drive.cache.load('fpath_bytes_list') assert len(fpath_bytes_list) == len(drive.fpath_list), 'bad length' except ut.CacheMissException: def tryread_nbytes(fpath): try: return ut.file_bytes(fpath) except SystemErrors: return np.nan fpath_bytes_list = [ tryread_nbytes(fpath) for fpath in ut.ProgIter( drive.fpath_list, 'reading size', freq=1000) ] assert len(drive.fpath_list) == len(drive.fpath_list) drive.cache.save('fpath_bytes_list', fpath_bytes_list) return fpath_bytes_list
def dump_vectors(qreq_): """ Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST', a='default:mingt=2,pername=2') >>> qaids = aid_list[0:2] >>> daids = aid_list[:] >>> config = {'nAssign': 1, 'num_words': 8000, >>> 'sv_on': True} >>> qreq_ = SMKRequest(ibs, qaids, daids, config) >>> qreq_.ensure_data() """ inva = qreq_.dinva X = qreq_.dinva.get_annot(qreq_.daids[0]) n_words = inva.wx_list[-1] + 1 n_dims = X.agg_rvecs.shape[1] n_annots = len(qreq_.daids) X.agg_rvecs.dtype vlads = np.zeros((n_annots, n_words, n_dims), dtype=np.float32) ids_ = list(zip(qreq_.dnids, qreq_.daids)) for count, (nid, aid) in enumerate(ut.ProgIter(ids_, label='vlad')): # X.rrr() X = qreq_.dinva.get_annot(aid) out = vlads[count] out[X.wx_list] = X.agg_rvecs # X.to_dense(out=out) # Flatten out vlads.shape = (n_annots, n_words * n_dims) ut.print_object_size(vlads) fname = 'vlad_%d_d%d_%s' % (n_annots, n_words * n_dims, qreq_.ibs.get_dbname()) fpath = ut.truepath('~/' + fname + '.mat') import scipy.io mdict = { 'vlads': vlads, 'nids': qreq_.dnids, } scipy.io.savemat(fpath, mdict)