def accumulate_input_ids(edge_list): """ python -m dtool.example_depcache2 testdata_depc4 --show """ edge_data = ut.take_column(edge_list, 3) # We are accumulating local input ids toaccum_list_ = ut.dict_take_column(edge_data, 'local_input_id') if BIG_HACK and True: v_list = ut.take_column(edge_list, 1) # show the local_input_ids at the entire level pred_ids = ([[ x['local_input_id'] for x in list(graph.pred[node].values())[0].values() ] if len(graph.pred[node]) else [] for node in v_list]) toaccum_list = [ x + ':' + ';'.join(y) for x, y in zip(toaccum_list_, pred_ids) ] else: toaccum_list = toaccum_list_ # Default dumb accumulation accum_ids_ = ut.cumsum(zip(toaccum_list), tuple()) accum_ids = ut.lmap(condense_accum_ids, accum_ids_) if BIG_HACK: accum_ids = ut.lmap(condense_accum_ids_stars, accum_ids) accum_ids = [('t', ) + x for x in accum_ids] ut.dict_set_column(edge_data, 'accum_id', accum_ids) return accum_ids
def _assert_self(inva, qreq_): ibs = qreq_.ibs assert len(inva.aids) == len(inva.wx_lists) assert len(inva.aids) == len(inva.fxs_lists) assert len(inva.aids) == len(inva.maws_lists) assert len(inva.aids) == len(inva.agg_rvecs) assert len(inva.aids) == len(inva.agg_flags) nfeat_list1 = ibs.get_annot_num_feats(inva.aids, config2_=qreq_.qparams) nfeat_list2 = [sum(ut.lmap(len, fx_list)) for fx_list in inva.fxs_lists] nfeat_list3 = [sum(ut.lmap(len, maws)) for maws in inva.maws_lists] ut.assert_lists_eq(nfeat_list1, nfeat_list2) ut.assert_lists_eq(nfeat_list1, nfeat_list3)
def sanity_checks(offset_list, Y_list, query_annots, ibs): nfeat_list = np.diff(offset_list) for Y, nfeat in ut.ProgIter(zip(Y_list, nfeat_list), 'checking'): assert nfeat == sum(ut.lmap(len, Y.fxs_list)) if False: # Visualize queries # Look at the standard query images here # http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf from wbia.viz import viz_chip import wbia.plottool as pt pt.qt4ensure() fnum = 1 pnum_ = pt.make_pnum_nextgen(len(query_annots.aids) // 5, 5) for aid in ut.ProgIter(query_annots.aids): pnum = pnum_() viz_chip.show_chip( ibs, aid, in_image=True, annote=False, notitle=True, draw_lbls=False, fnum=fnum, pnum=pnum, )
def get_patches(inva, wx, ibs, verbose=True): """ Loads the patches assigned to a particular word in this stack >>> inva.wx_to_aids = inva.compute_inverted_list() >>> verbose=True """ config = inva.config aid_list = inva.wx_to_aids[wx] X_list = [inva.get_annot(aid) for aid in aid_list] fxs_groups = [X.fxs(wx) for X in X_list] all_kpts_list = ibs.depc.d.get_feat_kpts(aid_list, config=config) sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0) total_patches = sum(ut.lmap(len, fxs_groups)) chip_list = ibs.depc_annot.d.get_chips_img(aid_list, config=config) # convert to approprate colorspace #if colorspace is not None: # chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 shape = (total_patches, patch_size, patch_size, 3) _prog = ut.ProgPartial(enabled=verbose, lbl='warping patches', bs=True) _patchiter = ut.iflatten([ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] #vt.get_warped_patches(chip, kpts, patch_size=patch_size, use_cpp=True)[0] for chip, kpts in _prog(zip(chip_list, sub_kpts_list), length=len(aid_list)) ]) word_patches = vt.fromiter_nd(_patchiter, shape, dtype=np.uint8) return word_patches
def phis_flags_list(X, idxs): """ get subset of non-aggregated residual vectors """ phis_list = ut.take(X.rvecs_list, idxs) flags_list = ut.take(X.flags_list, idxs) if X.int_rvec: phis_list = ut.lmap(smk_funcs.uncast_residual_integer, phis_list) return phis_list, flags_list
def nx_from_matrix(weight_matrix, nodes=None, remove_self=True): import networkx as nx import utool as ut import numpy as np if nodes is None: nodes = list(range(len(weight_matrix))) weight_list = weight_matrix.ravel() flat_idxs_ = np.arange(weight_matrix.size) multi_idxs_ = np.unravel_index(flat_idxs_, weight_matrix.shape) # Remove 0 weight edges flags = np.logical_not(np.isclose(weight_list, 0)) weight_list = ut.compress(weight_list, flags) multi_idxs = ut.compress(list(zip(*multi_idxs_)), flags) edge_list = ut.lmap(tuple, ut.unflat_take(nodes, multi_idxs)) if remove_self: flags = [e1 != e2 for e1, e2 in edge_list] edge_list = ut.compress(edge_list, flags) weight_list = ut.compress(weight_list, flags) graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(edge_list) label_list = ['%.2f' % w for w in weight_list] nx.set_edge_attributes(graph, 'weight', dict(zip(edge_list, weight_list))) nx.set_edge_attributes(graph, 'label', dict(zip(edge_list, label_list))) return graph
def make_adj_matrix(G): edges = list(G.edges()) edge2_idx = ut.partial(ut.dict_take, node2_idx) uv_list = ut.lmap(edge2_idx, edges) A = np.zeros((len(nodes), len(nodes))) A[tuple(np.array(uv_list).T)] = 1 return A
def _recombine_labels(chunk_labels): """ Ensure each group has different indices chunk_labels = grouped_labels """ import utool as ut labels = ut.take_column(chunk_labels, 0) idxs = ut.take_column(chunk_labels, 1) # nunique_list = [len(np.unique(a)) for a in labels] chunksizes = ut.lmap(len, idxs) cumsum = np.cumsum(chunksizes).tolist() combined_idxs = np.hstack(idxs) combined_labels = np.hstack(labels) offset = 0 # Ensure each chunk has unique labels for start, stop in zip([0] + cumsum, cumsum): combined_labels[start:stop] += offset offset += len(np.unique(combined_labels[start:stop])) # Ungroup X_labels = np.empty(combined_idxs.max() + 1, dtype=np.int) # new_labels[:] = -1 X_labels[combined_idxs] = combined_labels return X_labels
def cheetah_stats(ibs): filters = [ dict(view=['right', 'frontright', 'backright'], minqual='good'), dict(view=['right', 'frontright', 'backright']), ] for filtkw in filters: annots = ibs.annots(ibs.filter_annots_general(**filtkw)) unique_nids, grouped_annots = annots.group(annots.nids) annots_per_name = ut.lmap(len, grouped_annots) annots_per_name_freq = ut.dict_hist(annots_per_name) def bin_mapper(num): if num < 5: return (num, num + 1) else: for bin, mod in [(20, 5), (50, 10)]: if num < bin: low = (num // mod) * mod high = low + mod return (low, high) if num >= bin: return (bin, None) else: assert False, str(num) hist = ut.ddict(lambda: 0) for num in annots_per_name: hist[bin_mapper(num)] += 1 hist = ut.sort_dict(hist) print('------------') print('filters = %s' % ut.repr4(filtkw)) print('num_annots = %r' % (len(annots))) print('num_names = %r' % (len(unique_nids))) print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq))) print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist))) assert sum(hist.values()) == len(unique_nids)
def read_csv(fpath): """ reads csv in unicode """ import csv import utool as ut #csvfile = open(fpath, 'rb') with open(fpath, 'rb') as csvfile: row_iter = csv.reader(csvfile, delimiter=str(','), quotechar=str('|')) row_list = [ut.lmap(ut.ensure_unicode, row) for row in row_iter] return row_list
def nx_make_adj_matrix(G): import utool as ut nodes = list(G.nodes()) node2_idx = ut.make_index_lookup(nodes) edges = list(G.edges()) edge2_idx = ut.partial(ut.dict_take, node2_idx) uv_list = ut.lmap(edge2_idx, edges) A = np.zeros((len(nodes), len(nodes))) A[tuple(np.array(uv_list).T)] = 1 return A
def __nice__(self): import numpy as np len_list = ut.lmap(len, self.annots_list) num = len(self.annots_list) mean = np.mean(len_list) std = np.std(len_list) if six.PY3: nice = '(n=%r, μ=%.1f, σ=%.1f)' % (num, mean, std) else: nice = '(n=%r, m=%.1f, s=%.1f)' % (num, mean, std) return nice
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.dict_str(grouped, nl=4)) keytoid = {} for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.repr4(grouped, nl=4)) keytoid = { } for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def _exec_pairwise_match(extr, edges, prog_hook=None): """ Performs one-vs-one matching between pairs of annotations. This establishes the feature correspondences. CommandLine: python -m wbia.algo.verif.pairfeat _exec_pairwise_match --show Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.verif.pairfeat import * # NOQA >>> import wbia >>> ibs = wbia.opendb('testdb1') >>> match_config = dict(histeq=True) >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config) >>> edges = [(1, 2), (2, 3)] >>> prog_hook = None >>> match_list = extr._exec_pairwise_match(edges) >>> match1, match2 = match_list >>> assert match1.annot2 is match2.annot1 >>> assert match1.annot1 is not match2.annot2 >>> ut.quit_if_noshow() >>> match2.show() >>> ut.show_if_requested() """ if extr.verbose: logger.info('[extr] executing pairwise one-vs-one matching') ibs = extr.ibs match_config = extr.match_config edges = ut.lmap(tuple, ut.aslist(edges)) qaids = ut.take_column(edges, 0) daids = ut.take_column(edges, 1) # The depcache does the pairwise matching procedure match_list = ibs.depc.get('pairwise_match', (qaids, daids), 'match', config=match_config) # Hack: Postprocess matches to re-add wbia annotation info # in lazy-dict format from wbia import core_annots config = ut.hashdict(match_config) qannot_cfg = dannot_cfg = config preload = True configured_lazy_annots = core_annots.make_configured_annots( ibs, qaids, daids, qannot_cfg, dannot_cfg, preload=preload) for qaid, daid, match in zip(qaids, daids, match_list): match.annot1 = configured_lazy_annots[config][qaid] match.annot2 = configured_lazy_annots[config][daid] match.config = config return match_list
def dataset_id(dataset): shape_str = 'x'.join(ut.lmap(str, dataset._info['data_shape'])) num_data = dataset._info['num_data'] parts = [] if dataset.name is not None: parts.append(dataset.name) if num_data is not None: parts.append(str(num_data)) parts.append(shape_str) if dataset.hashid: parts.append(dataset.hashid) dsid = '_'.join(parts) return dsid
def fix_splits_interaction(ibs): """ python -m wbia fix_splits_interaction --show Example: >>> # DISABLE_DOCTEST GGR >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) >>> import wbia.guitool as gt >>> gt.ensure_qtapp() >>> win = fix_splits_interaction(ibs) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> gt.qtapp_loop(qwin=win) """ split_props = {'splitcase', 'photobomb'} all_annot_groups = ibs._annot_groups( ibs.group_annots_by_name(ibs.get_valid_aids())[0]) all_has_split = [ len(split_props.intersection(ut.flatten(tags))) > 0 for tags in all_annot_groups.match_tags ] tosplit_annots = ut.compress(all_annot_groups.annots_list, all_has_split) tosplit_annots = ut.take(tosplit_annots, ut.argsort(ut.lmap(len, tosplit_annots)))[::-1] if ut.get_argflag('--reverse'): tosplit_annots = tosplit_annots[::-1] logger.info('len(tosplit_annots) = %r' % (len(tosplit_annots), )) aids_list = [a.aids for a in tosplit_annots] from wbia.algo.graph import graph_iden from wbia.viz import viz_graph2 import wbia.guitool as gt import wbia.plottool as pt pt.qt4ensure() gt.ensure_qtapp() for aids in ut.InteractiveIter(aids_list): infr = graph_iden.AnnotInference(ibs, aids) infr.initialize_graph() win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode='rereview') win.populate_edge_model() win.show() return win
def get_homog_list_nbytes_nested(list_nested): if list_nested is None: return 0 if len(list_nested) == 0: return 0 else: val = list_nested[0] if isinstance(val, np.ndarray): nbytes = sum(sys.getsizeof(v) for v in list_nested) #item_nbytes = sum(v.nbytes for v in list_nested) else: nest_nbytes = sys.getsizeof(val) * len(list_nested) totals = sum(ut.lmap(len, list_nested)) item_nbytes = sys.getsizeof(val[0]) * totals nbytes = nest_nbytes + item_nbytes return nbytes
def edge_attr_df(infr, key, edges=None, default=ut.NoParam): """ constructs DataFrame using current predictions """ edge_states = infr.gen_edge_attrs(key, edges=edges, default=default) edge_states = list(edge_states) if isinstance(edges, pd.MultiIndex): index = edges else: if edges is None: edges_ = ut.take_column(edge_states, 0) else: edges_ = ut.lmap(tuple, ut.aslist(edges)) index = pd.MultiIndex.from_tuples(edges_, names=('aid1', 'aid2')) records = ut.itake_column(edge_states, 1) edge_df = pd.Series.from_array(records) edge_df.name = key edge_df.index = index return edge_df
def load_feat_scores(qreq_, qaids): import wbia # NOQA from os.path import dirname, join # NOQA # HACKY CACHE cfgstr = qreq_.get_cfgstr(with_input=True) cache_dir = join(dirname(dirname(wbia.__file__)), 'TMP_FEATSCORE_CACHE') namemode = ut.get_argval('--namemode', default=True) fsvx = ut.get_argval('--fsvx', type_='fuzzy_subset', default=slice(None, None, None)) threshx = ut.get_argval('--threshx', type_=int, default=None) thresh = ut.get_argval('--thresh', type_=float, default=0.9) num = ut.get_argval('--num', type_=int, default=1) cfg_components = [ cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num ] cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components)) cache_hashid = ut.hashstr27(cache_cfgstr + '_v1') cache_name = 'get_cfgx_feat_scores_' + cache_hashid @ut.cached_func(cache_name, cache_dir=cache_dir, key_argx=[], use_cache=True) def get_cfgx_feat_scores(qreq_, qaids): from wbia.algo.hots import scorenorm cm_list = qreq_.execute(qaids) # logger.info('Done loading cached chipmatches') tup = scorenorm.get_training_featscores(qreq_, cm_list, disttype, namemode, fsvx, threshx, thresh, num=num) # logger.info(ut.depth_profile(tup)) tp_scores, tn_scores, scorecfg = tup return tp_scores, tn_scores, scorecfg tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids) return tp_scores, tn_scores, scorecfg
def inplace_filter_results(self, filter_pat): import utool as ut self.filter_pats.append(filter_pat) # Get zipflags flags_list = self.pattern_filterflags(filter_pat) # Check to see if there are any survivors flags = ut.lmap(any, flags_list) # found_lines_list = ut.zipcompress(self.found_lines_list, flags_list) found_lxs_list = ut.zipcompress(self.found_lxs_list, flags_list) # found_fpath_list = ut.compress(self.found_fpath_list, flags) found_lines_list = ut.compress(found_lines_list, flags) found_lxs_list = ut.compress(found_lxs_list, flags) # In place modification self.found_fpath_list = found_fpath_list self.found_lines_list = found_lines_list self.found_lxs_list = found_lxs_list
def purge_ensure_one_annot_per_images(ibs): """ pip install Pipe """ # Purge all but one annotation images = ibs.images() # images.aids groups = images._annot_groups import numpy as np # Take all but the largest annotations per images large_masks = [ ut.index_to_boolmask([np.argmax(x)], len(x)) for x in groups.bbox_area ] small_masks = ut.lmap(ut.not_list, large_masks) # Remove all but the largets annotation small_aids = ut.zipcompress(groups.aid, small_masks) small_aids = ut.flatten(small_aids) # Fix any empty images images = ibs.images() empty_images = ut.where(np.array(images.num_annotations) == 0) logger.info('empty_images = %r' % (empty_images, )) # list(map(basename, map(dirname, images.uris_original))) def VecPipe(func): import pipe @pipe.Pipe def wrapped(sequence): return map(func, sequence) # return (None if item is None else func(item) for item in sequence) return wrapped name_list = list(images.uris_original | VecPipe(dirname) | VecPipe(basename)) aids_list = images.aids ut.assert_all_eq(list(aids_list | VecPipe(len))) annots = ibs.annots(ut.flatten(aids_list)) annots.names = name_list
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def num_uncolored(self): return sum(ut.lmap(int, self.type2_manas.get('uncolored', [])))
ut.set_project_repos(IBEIS_REPO_URLS, IBEIS_REPO_DIRS) ut.gg_command('{pythoncmd} setup.py develop'.format(**locals()), sudo=not ut.in_virtual_env()) if GET_ARGFLAG('--install'): # Dont use this if you are a developer. Use develop instead. ut.set_project_repos(IBEIS_REPO_URLS, IBEIS_REPO_DIRS) ut.gg_command('python setup.py install'.format(**locals())) if GET_ARGFLAG('--test'): failures = [] for repo_dpath in IBEIS_REPO_DIRS: # ut.getp_ mod_dpaths = ut.get_submodules_from_dpath(repo_dpath, recursive=False, only_packages=True) modname_list = ut.lmap(ut.get_modname_from_modpath, mod_dpaths) print('Checking modules = %r' % (modname_list,)) for modname in modname_list: try: ut.import_modname(modname) print(modname + ' success') except ImportError as ex: failures += [modname] print(modname + ' failure') print('failures = %s' % (ut.repr3(failures),)) # print('repo_dpath = %r' % (repo_dpath,)) # print('modules = %r' % (modules,)) # import ibeis
def update_bindings(): r""" Returns: dict: matchtups CommandLine: python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings Example: >>> # DISABLE_DOCTEST >>> from autogen_bindings import * # NOQA >>> import sys >>> import utool as ut >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts')) >>> matchtups = update_bindings() >>> result = ('matchtups = %s' % (ut.repr2(matchtups),)) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ from os.path import basename import difflib import numpy as np import re binding_names = [ 'build_index', 'used_memory', 'add_points', 'remove_point', 'compute_cluster_centers', 'load_index', 'save_index', 'find_nearest_neighbors', 'radius_search', 'remove_points', 'free_index', 'find_nearest_neighbors_index', # 'size', # 'veclen', # 'get_point', # 'flann_get_distance_order', # 'flann_get_distance_type', # 'flann_log_verbosity', # 'clean_removed_points', ] _places = [ '~/code/flann/src/cpp/flann/flann.cpp', '~/code/flann/src/cpp/flann/flann.h', '~/code/flann/src/python/pyflann/flann_ctypes.py', '~/code/flann/src/python/pyflann/index.py', ] eof_sentinals = { # 'flann_ctypes.py': '# END DEFINE BINDINGS', 'flann_ctypes.py': 'def ensure_2d_array(arr', # 'flann.h': '// END DEFINE BINDINGS', 'flann.h': '#ifdef __cplusplus', 'flann.cpp': None, 'index.py': None, } block_sentinals = { 'flann.h': re.escape('/**'), 'flann.cpp': 'template *<typename Distance>', # 'flann_ctypes.py': '\n', 'flann_ctypes.py': 'flann\.[a-z_.]* =', # 'index.py': ' def .*', 'index.py': ' [^ ].*', } places = { basename(fpath): fpath for fpath in ut.lmap(ut.truepath, _places) } text_dict = ut.map_dict_vals(ut.readfrom, places) lines_dict = {key: val.split('\n') for key, val in text_dict.items()} orig_texts = text_dict.copy() # NOQA binding_defs = {} named_blocks = {} print('binding_names = %r' % (binding_names, )) for binding_name in binding_names: blocks, defs = autogen_parts(binding_name) binding_defs[binding_name] = defs named_blocks[binding_name] = blocks for binding_name in ut.ProgIter(binding_names): ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name, ), 'yellow') blocks_dict = named_blocks[binding_name] for key in places.keys(): ut.colorprint( '---- generating %s for %s -----' % ( binding_name, key, ), 'yellow') # key = 'flann_ctypes.py' # print(text_dict[key]) old_text = text_dict[key] line_list = lines_dict[key] #text = old_text block = blocks_dict[key] debug = ut.get_argflag('--debug') # debug = True # if debug: # print(ut.highlight_code(block, splitext(key)[1])) # Find a place in the code that already exists searchblock = block if key.endswith('.cpp') or key.endswith('.h'): searchblock = re.sub(ut.REGEX_C_COMMENT, '', searchblock, flags=re.MULTILINE | re.DOTALL) searchblock = '\n'.join(searchblock.splitlines()[0:3]) # @ut.cached_func(verbose=False) def cached_match(old_text, searchblock): def isjunk(x): return False return x in ' \t,*()' def isjunk2(x): return x in ' \t,*()' # Not sure why the first one just doesnt find it # isjunk = None sm = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=False) sm0 = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=True) sm1 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=False) sm2 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=True) matchtups = (sm.get_matching_blocks() + sm0.get_matching_blocks() + sm1.get_matching_blocks() + sm2.get_matching_blocks()) return matchtups matchtups = cached_match(old_text, searchblock) # Find a reasonable match in matchtups found = False if debug: # print('searchblock =\n%s' % (searchblock,)) print('searchblock = %r' % (searchblock, )) for (a, b, size) in matchtups: matchtext = old_text[a:a + size] pybind = binding_defs[binding_name]['py_binding_name'] if re.search(binding_name + '\\b', matchtext) or re.search( pybind + '\\b', matchtext): found = True pos = a + size if debug: print('MATCHING TEXT') print(matchtext) break else: if debug and 0: print('Not matching') print('matchtext = %r' % (matchtext, )) matchtext2 = old_text[a - 10:a + size + 20] print('matchtext2 = %r' % (matchtext2, )) if found: linelens = np.array(ut.lmap(len, line_list)) + 1 sumlen = np.cumsum(linelens) row = np.where(sumlen < pos)[0][-1] + 1 #print(line_list[row]) # Search for extents of the block to overwrite block_sentinal = block_sentinals[key] row1 = ut.find_block_end(row, line_list, block_sentinal, -1) - 1 row2 = ut.find_block_end(row + 1, line_list, block_sentinal, +1) eof_sentinal = eof_sentinals[key] if eof_sentinal is not None: print('eof_sentinal = %r' % (eof_sentinal, )) row2 = min([ count for count, line in enumerate(line_list) if line.startswith(eof_sentinal) ][-1], row2) nr = len((block + '\n\n').splitlines()) new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('FOUND AND REPLACING %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint( 'FOUND AND REPLACED WITH %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print( ut.color_diff_text( ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) else: # Append to end of the file eof_sentinal = eof_sentinals[key] if eof_sentinal is None: row2 = len(line_list) - 1 else: row2_choice = [ count for count, line in enumerate(line_list) if line.startswith(eof_sentinal) ] if len(row2_choice) == 0: row2 = len(line_list) - 1 assert False else: row2 = row2_choice[-1] - 1 # row1 = row2 - 1 # row2 = row2 - 1 row1 = row2 new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) # block + '\n\n\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) nr = len((block + '\n\n').splitlines()) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint( 'NOT FOUND AND REPLACING %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint( 'NOT FOUND AND REPLACED WITH %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print( ut.color_diff_text( ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) text_dict[key] = '\n'.join(new_line_list) lines_dict[key] = new_line_list ut.colorprint('L___ GENERATED BINDING %s ___' % (binding_name, ), 'yellow') for key in places: new_text = '\n'.join(lines_dict[key]) #ut.writeto(ut.augpath(places[key], '.new'), new_text) ut.writeto(ut.augpath(places[key]), new_text) for key in places: if ut.get_argflag('--diff'): difftext = ut.get_textdiff(orig_texts[key], new_text, num_context_lines=7, ignore_whitespace=True) difftext = ut.color_diff_text(difftext) print(difftext)
def testdata_depc(fname=None): """ Example of local registration """ import dtool import vtool as vt gpath_list = ut.lmap(ut.grab_test_imgpath, ut.get_valid_test_imgkeys(), verbose=False) dummy_root = 'dummy_annot' def get_root_uuid(aid_list): return ut.lmap(ut.hashable_to_uuid, aid_list) # put the test cache in the dtool repo dtool_repo = dirname(ut.get_module_dir(dtool)) cache_dpath = join(dtool_repo, 'DEPCACHE') depc = dtool.DependencyCache( root_tablename=dummy_root, default_fname=fname, cache_dpath=cache_dpath, get_root_uuid=get_root_uuid, #root_asobject=root_asobject, use_globals=False) @depc.register_preproc(tablename='chip', parents=[dummy_root], colnames=['size', 'chip'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite)], configclass=DummyChipConfig) def dummy_preproc_chip(depc, annot_rowid_list, config=None): """ TODO: Infer properties from docstr? Args: depc (dtool.DependencyCache): annot_rowid_list (list): list of annot rowids config (dict): config dictionary Returns: tuple : ((int, int), ('extern', vt.imread)) """ if config is None: config = {} # Demonstates using asobject to get input to function as a dictionary # of properties #for annot in annot_list: #print('[preproc] Computing chips of aid=%r' % (aid,)) print('[preproc] Computing chips') for aid in annot_rowid_list: #aid = annot['aid'] #chip_fpath = annot['gpath'] chip_fpath = gpath_list[aid] #w, h = vt.image.open_image_size(chip_fpath) chip = vt.imread(chip_fpath) size = vt.get_size(chip) #size = (w, h) #print('* chip_fpath = %r' % (chip_fpath,)) #print('* size = %r' % (size,)) #yield size, chip_fpath yield size, chip @depc.register_preproc( 'probchip', [dummy_root], ['size', 'probchip'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite, '.png')], configclass=ProbchipConfig, ) def dummy_preproc_probchip(depc, root_rowids, config): print('[preproc] Computing probchip') for rowid in root_rowids: if config['testerror']: if rowid % 2 == 0: # Test error yeilds None on even rowids yield None continue rng = np.random.RandomState(rowid) probchip = rng.randint(0, 255, size=(64, 64)) #probchip = np.zeros((64, 64)) size = (rowid, rowid) yield size, probchip @depc.register_preproc( 'keypoint', ['chip'], ['kpts', 'num'], [np.ndarray, int], #default_onthefly=True, configclass=DummyKptsConfig, docstr='Used to store individual chip features (ellipses)',) def dummy_preproc_kpts(depc, chip_rowids, config=None): if config is None: config = {} print('config = %r' % (config,)) adapt_shape = config['adapt_shape'] print('[preproc] Computing kpts') for rowid in chip_rowids: if adapt_shape: kpts = np.zeros((7 + rowid, 6)) + rowid else: kpts = np.ones((7 + rowid, 6)) + rowid num = len(kpts) yield kpts, num @depc.register_preproc('descriptor', ['keypoint'], ['vecs'], [np.ndarray],) def dummy_preproc_vecs(depc, kp_rowid, config=None): if config is None: config = {} print('[preproc] Computing vecs') for rowid in kp_rowid: yield np.ones((7 + rowid, 8), dtype=np.uint8) + rowid, @depc.register_preproc('fgweight', ['keypoint', 'probchip'], ['fgweight'], [np.ndarray],) def dummy_preproc_fgweight(depc, kpts_rowid, probchip_rowid, config=None): if config is None: config = {} print('[preproc] Computing fgweight') for rowid1, rowid2 in zip(kpts_rowid, probchip_rowid): yield np.ones(7 + rowid1), @depc.register_preproc( tablename='vsmany', colnames='annotmatch', coltypes=DummyAnnotMatch, requestclass=DummyVsManyRequest, configclass=DummyVsManyConfig) def vsmany_matching(depc, qaids, config=None): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ print('RUNNING DUMMY VSMANY ALGO') daids = config.daids qaids = qaids sver_on = config.dummy_sver_cfg['sver_on'] kpts_list = depc.get_property('keypoint', qaids) # NOQA #dummy_preproc_kpts for qaid in qaids: dnid_list = [1, 1, 2, 2] unique_nids = [1, 2] if sver_on: annot_score_list = [.2, .2, .4, .5] name_score_list = [.2, .5] else: annot_score_list = [.3, .3, .6, .9] name_score_list = [.1, .7] annot_match = DummyAnnotMatch(qaid, daids, dnid_list, annot_score_list, unique_nids, name_score_list) yield annot_match SIMPLE = 0 if not SIMPLE: @depc.register_preproc( tablename='chipmask', parents=[dummy_root], colnames=['size', 'mask'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite)]) def dummy_manual_chipmask(depc, parent_rowids, config=None): import vtool as vt from plottool import interact_impaint mask_dpath = join(depc.cache_dpath, 'ManualChipMask') ut.ensuredir(mask_dpath) if config is None: config = {} print('Requesting user defined chip mask') for rowid in parent_rowids: img = vt.imread(gpath_list[rowid]) mask = interact_impaint.impaint_mask2(img) mask_fpath = join(mask_dpath, 'mask%d.png' % (rowid,)) vt.imwrite(mask_fpath, mask) w, h = vt.get_size(mask) yield (w, h), mask_fpath @depc.register_preproc('notch', [dummy_root], ['notchdata'], [np.ndarray],) def dummy_preproc_notch(depc, parent_rowids, config=None): if config is None: config = {} print('[preproc] Computing notch') for rowid in parent_rowids: yield np.empty(5 + rowid), @depc.register_preproc( 'spam', ['fgweight', 'chip', 'keypoint'], ['spam', 'eggs', 'size', 'uuid', 'vector', 'textdata'], [str, int, (int, int), uuid.UUID, np.ndarray, ('extern', ut.readfrom)], docstr='I dont like spam',) def dummy_preproc_spam(depc, *args, **kwargs): config = kwargs.get('config', None) if config is None: config = {} print('[preproc] Computing spam') ut.writeto('tmp.txt', ut.lorium_ipsum()) for x in zip(*args): size = (42, 21) uuid = ut.get_zero_uuid() vector = np.ones(3) yield ('spam', 3665, size, uuid, vector, 'tmp.txt') @depc.register_preproc( 'nnindexer', ['keypoint*'], ['flann'], [str], # [('extern', ut.load_data)], configclass=DummyIndexerConfig, ) def dummy_preproc_indexer(depc, parent_rowids_list, config=None): print('COMPUTING DUMMY INDEXER') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for parent_rowids in parent_rowids_list: yield ('really cool flann object' + str(config.get_cfgstr()) + ' ' + str(parent_rowids),) @depc.register_preproc( 'notchpair', ['notch', 'notch'], ['pairscore'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_notchpair(depc, n1, n2, config=None): print('COMPUTING MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for nn1, nn2 in zip(n1, n2): yield (nn1 + nn2,) @depc.register_preproc( 'multitest', ['keypoint', 'notch', 'notch', 'fgweight*', 'notchpair*', 'notchpair*', 'notchpair', 'nnindexer'], ['foo'], [str], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_multitest(depc, *args, **kwargs): print('COMPUTING MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for x in zip(args): yield ('cool multi object' + str(kwargs) + ' ' + str(x),) # TEST MULTISET DEPENDENCIES @depc.register_preproc( 'multitest_score', ['multitest'], ['score'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_multitest_score(depc, parent_rowids, config=None): print('COMPUTING DEPENDENCY OF MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for parent_rowids in zip(parent_rowids): yield (parent_rowids,) # TEST MULTISET DEPENDENCIES @depc.register_preproc( 'multitest_score_x', ['multitest_score', 'multitest_score'], ['score'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def multitest_score_x(depc, *args, **kwargs): raise NotImplementedError('hack') # REGISTER MATCHING ALGORITHMS @depc.register_preproc(tablename='neighbs', colnames=['qx2_idx', 'qx2_dist'], coltypes=[np.ndarray, np.ndarray], parents=['keypoint', 'fgweight', 'nnindexer', 'nnindexer']) def neighbs(depc, *args, **kwargs): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ #dummy_preproc_kpts for qaid in zip(args): yield np.array([qaid]), np.array([qaid]) @depc.register_preproc(tablename='neighbs_score', colnames=['qx2_dist'], coltypes=[np.ndarray], parents=['neighbs']) def neighbs_score(depc, *args, **kwargs): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ raise NotImplementedError('hack') @depc.register_preproc( 'vsone', [dummy_root, dummy_root], ['score', 'match_obj', 'fm'], [float, DummyVsOneMatch, np.ndarray], requestclass=DummyVsOneRequest, configclass=DummyVsOneConfig, chunksize=2 ) def vsone_matching(depc, qaids, daids, config): """ CommandLine: python -m dtool.base --exec-VsOneSimilarityRequest """ print('RUNNING DUMMY VSONE ALGO') for qaid, daid in zip(qaids, daids): match = DummyVsOneMatch() match.qaid = qaid match.daid = daid match.fm = np.array([[1, 2], [3, 4]]) score = match.score = qaid + daid yield (score, match, match.fm) # table = depc['spam'] # print(ut.repr2(table.get_addtable_kw(), nl=2)) depc.initialize() # table.print_schemadef() # print(table.db.get_schema_current_autogeneration_str()) return depc
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m wbia.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) # infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered( query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. # map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) # joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred # evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction(query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [ joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx) ] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array( [g.sum() for g in vt.apply_grouping(new_vals, groupxs)]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column( sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict( zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) logger.info(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list( zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) # probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) # joint.marginalize(irrelevant_vars) # joint.normalize() # new_rows = joint._row_labels() # new_vals = joint.values.ravel() # map_vals = new_rows[new_vals.argmax()] # map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints # marginalized_joints = {} # for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def wildbook_signal_annot_name_changes(ibs, aid_list=None, wb_target=None, dryrun=False): r""" Args: aid_list (int): list of annotation ids(default = None) tomcat_dpath (None): (default = None) wb_target (None): (default = None) dryrun (bool): (default = False) CommandLine: python -m ibeis wildbook_signal_annot_name_changes:0 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 python -m ibeis wildbook_signal_annot_name_changes:2 Setup: >>> wb_target = None >>> dryrun = ut.get_argflag('--dryrun') Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where some names change, some do not. There are no new names. >>> old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> new_nid_list = ut.list_roll(old_nid_list, 1) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where all names change to one known name >>> #old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> #new_nid_list = [old_nid_list[0]] * len(old_nid_list) >>> old_nid_list = [1, 2] >>> new_nid_list = [1, 1] >>> print('old_nid_list = %r' % (old_nid_list,)) >>> print('new_nid_list = %r' % (new_nid_list,)) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> # Undo changes here (not undone in wildbook) >>> #ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> old_nid_list = [1, 2] >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) >>> # Signal what currently exists (should put them back to normal) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) """ print( '[ibs.wildbook_signal_imgsetid_list] signaling annot name changes to wildbook' ) wb_url = ibs.get_wildbook_base_url(wb_target) try: ibs.assert_ia_available_for_wb(wb_target) except Exception: pass if aid_list is None: aid_list = ibs.get_valid_aids(is_known=True) annot_uuid_list = ibs.get_annot_uuids(aid_list) annot_name_text_list = ibs.get_annot_name_texts(aid_list) grouped_uuids = ut.group_items(annot_uuid_list, annot_name_text_list) url = wb_url + '/ia' payloads = [{ 'resolver': { 'assignNameToAnnotations': { 'name': new_name, 'annotationIds': ut.lmap(str, annot_uuids), } } } for new_name, annot_uuids in grouped_uuids.items()] status_list = [] for json_payload in ut.ProgressIter(payloads, lbl='submitting URL', freq=1): print('[_send] URL=%r with json_payload=%r' % (url, json_payload)) if dryrun: status = False else: response = requests.post(url, json=json_payload) status = response.status_code == 200 if not status: print('Failed to push new names') print(response.text) status_list.append(status) return status_list
def ggr_random_name_splits(): """ CommandLine: python -m wbia.viz.viz_graph2 ggr_random_name_splits --show Ignore: sshfs -o idmap=user lev:/ ~/lev Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_graph2 import * # NOQA >>> ggr_random_name_splits() """ import wbia.guitool as gt gt.ensure_qtapp() # nid_list = ibs.get_valid_nids(filter_empty=True) import wbia dbdir = '/media/danger/GGR/GGR-IBEIS' dbdir = (dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')) ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) orig_filter_kw = { 'multiple': None, # 'view': ['right'], # 'minqual': 'good', 'is_known': True, 'min_pername': 2, } orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union( orig_filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, )) orig_all_annots = ibs.annots(orig_aids) orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group( orig_all_annots.nids) # Ensure we get everything orig_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids) ] # pip install quantumrandom if False: import quantumrandom data = quantumrandom.uint16() seed = data.sum() print('seed = %r' % (seed, )) # import Crypto.Random # from Crypto import Random # quantumrandom.get_data() # StrongRandom = Crypto.Random.random.StrongRandom # aes.reseed(3340258) # chars = [str(chr(x)) for x in data.view(np.uint8)] # aes_seed = str('').join(chars) # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator() # aes.reseed(aes_seed) # aes.pseudo_random_data(10) orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258) orig_sample_size = 75 random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs) orig_annot_sample = random_annot_groups[:orig_sample_size] # OOOPS MADE ERROR REDO ---- filter_kw = { 'multiple': None, 'view': ['right'], 'minqual': 'good', 'is_known': True, 'min_pername': 2, } filter_kw_ = ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, ) refiltered_sample = [ ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_) for annot in orig_annot_sample ] is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2 ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok) ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample] # Now compute real sample aids = ibs.filter_annots_general(filter_kw=filter_kw_) all_annots = ibs.annots(aids) unique_nids, grouped_annots_ = all_annots.group(all_annots.nids) grouped_annots = grouped_annots_ # Ensure we get everything # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)] pop = len(grouped_annots) pername_list = ut.lmap(len, grouped_annots) groups = wbia.annots.AnnotGroups(grouped_annots, ibs) match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags] tag_case_hist = ut.dict_hist(ut.flatten(match_tags)) print('name_pop = %r' % (pop, )) print('Annots per Multiton Name' + ut.repr3(ut.get_stats(pername_list, use_median=True))) print('Name Tag Hist ' + ut.repr3(tag_case_hist)) print('Percent Photobomb: %.2f%%' % (tag_case_hist['photobomb'] / pop * 100)) print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100)) # Remove the ok part from this sample remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids) remain_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids) ] sample_size = 75 import vtool as vt vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05) remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots), seed=3340258) remain_sample_size = sample_size - len(ok_part_orig_nids) remain_random_annot_groups = ut.take(remain_grouped_annots, remain_rand_idxs) remain_annot_sample = remain_random_annot_groups[:remain_sample_size] annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample # Filter out all bad parts annot_sample_filter = [ ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)) for annot in annot_sample_nofilter ] annot_sample = annot_sample_filter win = None from wbia.viz import viz_graph2 for annots in ut.InteractiveIter(annot_sample): if win is not None: win.close() win = viz_graph2.make_qt_graph_interface(ibs, aids=annots.aids, init_mode='rereview') print(win) sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs) flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags] print('Using Split and Photobomb') is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only Photobomb') is_positive = ['photobomb' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only SplitCase') is_positive = ['splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95)
def wildbook_signal_annot_name_changes(ibs, aid_list=None, wb_target=None, dryrun=False): r""" Args: aid_list (int): list of annotation ids(default = None) tomcat_dpath (None): (default = None) wb_target (None): (default = None) dryrun (bool): (default = False) CommandLine: python -m ibeis wildbook_signal_annot_name_changes:0 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 python -m ibeis wildbook_signal_annot_name_changes:2 Setup: >>> wb_target = None >>> dryrun = ut.get_argflag('--dryrun') Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where some names change, some do not. There are no new names. >>> old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> new_nid_list = ut.list_roll(old_nid_list, 1) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where all names change to one known name >>> #old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> #new_nid_list = [old_nid_list[0]] * len(old_nid_list) >>> old_nid_list = [1, 2] >>> new_nid_list = [1, 1] >>> print('old_nid_list = %r' % (old_nid_list,)) >>> print('new_nid_list = %r' % (new_nid_list,)) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> # Undo changes here (not undone in wildbook) >>> #ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> old_nid_list = [1, 2] >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) >>> # Signal what currently exists (should put them back to normal) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) """ print('[ibs.wildbook_signal_imgsetid_list] signaling annot name changes to wildbook') wb_url = ibs.get_wildbook_base_url(wb_target) try: ibs.assert_ia_available_for_wb(wb_target) except Exception: pass if aid_list is None: aid_list = ibs.get_valid_aids(is_known=True) annot_uuid_list = ibs.get_annot_uuids(aid_list) annot_name_text_list = ibs.get_annot_name_texts(aid_list) grouped_uuids = ut.group_items(annot_uuid_list, annot_name_text_list) url = wb_url + '/ia' payloads = [ {'resolver': {'assignNameToAnnotations': { 'name': new_name, 'annotationIds' : ut.lmap(str, annot_uuids), }}} for new_name, annot_uuids in grouped_uuids.items() ] status_list = [] for json_payload in ut.ProgressIter(payloads, lbl='submitting URL', freq=1): print('[_send] URL=%r with json_payload=%r' % (url, json_payload)) if dryrun: status = False else: response = requests.post(url, json=json_payload) status = response.status_code == 200 if not status: print('Failed to push new names') print(response.text) status_list.append(status) return status_list
def get_root_uuid(aid_list): return ut.lmap(ut.hashable_to_uuid, aid_list)
def testdata_depc(fname=None): """ Example of local registration """ import dtool import vtool as vt gpath_list = ut.lmap(ut.grab_test_imgpath, ut.get_valid_test_imgkeys(), verbose=False) dummy_root = 'dummy_annot' def get_root_uuid(aid_list): return ut.lmap(ut.hashable_to_uuid, aid_list) # put the test cache in the dtool repo dtool_repo = dirname(ut.get_module_dir(dtool)) cache_dpath = join(dtool_repo, 'DEPCACHE') depc = dtool.DependencyCache( root_tablename=dummy_root, default_fname=fname, cache_dpath=cache_dpath, get_root_uuid=get_root_uuid, #root_asobject=root_asobject, use_globals=False) @depc.register_preproc(tablename='chip', parents=[dummy_root], colnames=['size', 'chip'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite)], configclass=DummyChipConfig) def dummy_preproc_chip(depc, annot_rowid_list, config=None): """ TODO: Infer properties from docstr? Args: depc (dtool.DependencyCache): annot_rowid_list (list): list of annot rowids config (dict): config dictionary Returns: tuple : ((int, int), ('extern', vt.imread)) """ if config is None: config = {} # Demonstates using asobject to get input to function as a dictionary # of properties #for annot in annot_list: #print('[preproc] Computing chips of aid=%r' % (aid,)) print('[preproc] Computing chips') for aid in annot_rowid_list: #aid = annot['aid'] #chip_fpath = annot['gpath'] chip_fpath = gpath_list[aid] #w, h = vt.image.open_image_size(chip_fpath) chip = vt.imread(chip_fpath) size = vt.get_size(chip) #size = (w, h) print('Dummpy preproc chip yeilds') print('* chip_fpath = %r' % (chip_fpath, )) print('* size = %r' % (size, )) #yield size, chip_fpath yield size, chip @depc.register_preproc( 'probchip', [dummy_root], ['size', 'probchip'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite, '.png')], configclass=ProbchipConfig, ) def dummy_preproc_probchip(depc, root_rowids, config): print('[preproc] Computing probchip') for rowid in root_rowids: if config['testerror']: if rowid % 2 == 0: # Test error yeilds None on even rowids yield None continue rng = np.random.RandomState(rowid) probchip = rng.randint(0, 255, size=(64, 64)) #probchip = np.zeros((64, 64)) size = (rowid, rowid) yield size, probchip @depc.register_preproc( 'keypoint', ['chip'], ['kpts', 'num'], [np.ndarray, int], #default_onthefly=True, configclass=DummyKptsConfig, docstr='Used to store individual chip features (ellipses)', ) def dummy_preproc_kpts(depc, chip_rowids, config=None): if config is None: config = {} print('config = %r' % (config, )) adapt_shape = config['adapt_shape'] print('[preproc] Computing kpts') ut.assert_all_not_None(chip_rowids, 'chip_rowids') # This is in here to attempt to trigger a failure of the chips dont # exist and the feature cache is called. chip_fpath_list = depc.get_native('chip', chip_rowids, 'chip', read_extern=False) print('computing featurse from chip_fpath_list = %r' % (chip_fpath_list, )) for rowid in chip_rowids: if adapt_shape: kpts = np.zeros((7 + rowid, 6)) + rowid else: kpts = np.ones((7 + rowid, 6)) + rowid num = len(kpts) yield kpts, num @depc.register_preproc( 'descriptor', ['keypoint'], ['vecs'], [np.ndarray], ) def dummy_preproc_vecs(depc, kp_rowid, config=None): if config is None: config = {} print('[preproc] Computing vecs') for rowid in kp_rowid: yield np.ones((7 + rowid, 8), dtype=np.uint8) + rowid, @depc.register_preproc( 'fgweight', ['keypoint', 'probchip'], ['fgweight'], [np.ndarray], ) def dummy_preproc_fgweight(depc, kpts_rowid, probchip_rowid, config=None): if config is None: config = {} print('[preproc] Computing fgweight') for rowid1, rowid2 in zip(kpts_rowid, probchip_rowid): yield np.ones(7 + rowid1), @depc.register_preproc(tablename='vsmany', colnames='annotmatch', coltypes=DummyAnnotMatch, requestclass=DummyVsManyRequest, configclass=DummyVsManyConfig) def vsmany_matching(depc, qaids, config=None): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ print('RUNNING DUMMY VSMANY ALGO') daids = config.daids qaids = qaids sver_on = config.dummy_sver_cfg['sver_on'] kpts_list = depc.get_property('keypoint', list(qaids)) # NOQA #dummy_preproc_kpts for qaid in qaids: dnid_list = [1, 1, 2, 2] unique_nids = [1, 2] if sver_on: annot_score_list = [.2, .2, .4, .5] name_score_list = [.2, .5] else: annot_score_list = [.3, .3, .6, .9] name_score_list = [.1, .7] annot_match = DummyAnnotMatch(qaid, daids, dnid_list, annot_score_list, unique_nids, name_score_list) yield annot_match SIMPLE = 0 if not SIMPLE: @depc.register_preproc(tablename='chipmask', parents=[dummy_root], colnames=['size', 'mask'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite)]) def dummy_manual_chipmask(depc, parent_rowids, config=None): import vtool as vt from plottool import interact_impaint mask_dpath = join(depc.cache_dpath, 'ManualChipMask') ut.ensuredir(mask_dpath) if config is None: config = {} print('Requesting user defined chip mask') for rowid in parent_rowids: img = vt.imread(gpath_list[rowid]) mask = interact_impaint.impaint_mask2(img) mask_fpath = join(mask_dpath, 'mask%d.png' % (rowid, )) vt.imwrite(mask_fpath, mask) w, h = vt.get_size(mask) yield (w, h), mask_fpath @depc.register_preproc( 'notch', [dummy_root], ['notchdata'], [np.ndarray], ) def dummy_preproc_notch(depc, parent_rowids, config=None): if config is None: config = {} print('[preproc] Computing notch') for rowid in parent_rowids: yield np.empty(5 + rowid), @depc.register_preproc( 'spam', ['fgweight', 'chip', 'keypoint'], ['spam', 'eggs', 'size', 'uuid', 'vector', 'textdata'], [ str, int, (int, int), uuid.UUID, np.ndarray, ('extern', ut.readfrom) ], docstr='I dont like spam', ) def dummy_preproc_spam(depc, *args, **kwargs): config = kwargs.get('config', None) if config is None: config = {} print('[preproc] Computing spam') ut.writeto('tmp.txt', ut.lorium_ipsum()) for x in zip(*args): size = (42, 21) uuid = ut.get_zero_uuid() vector = np.ones(3) yield ('spam', 3665, size, uuid, vector, 'tmp.txt') @depc.register_preproc( 'nnindexer', ['keypoint*'], ['flann'], [str], # [('extern', ut.load_data)], configclass=DummyIndexerConfig, ) def dummy_preproc_indexer(depc, parent_rowids_list, config=None): print('COMPUTING DUMMY INDEXER') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for parent_rowids in parent_rowids_list: yield ('really cool flann object' + str(config.get_cfgstr()) + ' ' + str(parent_rowids), ) @depc.register_preproc( 'notchpair', ['notch', 'notch'], ['pairscore'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_notchpair(depc, n1, n2, config=None): print('COMPUTING MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for nn1, nn2 in zip(n1, n2): yield (nn1 + nn2, ) @depc.register_preproc( 'multitest', [ 'keypoint', 'notch', 'notch', 'fgweight*', 'notchpair*', 'notchpair*', 'notchpair', 'nnindexer' ], ['foo'], [str], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_multitest(depc, *args, **kwargs): print('COMPUTING MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for x in zip(args): yield ('cool multi object' + str(kwargs) + ' ' + str(x), ) # TEST MULTISET DEPENDENCIES @depc.register_preproc( 'multitest_score', ['multitest'], ['score'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def dummy_multitest_score(depc, parent_rowids, config=None): print('COMPUTING DEPENDENCY OF MULTITEST 1 ') #assert len(parent_rowids_list) == 1, 'handles only one indexer' for parent_rowids in zip(parent_rowids): yield (parent_rowids, ) # TEST MULTISET DEPENDENCIES @depc.register_preproc( 'multitest_score_x', ['multitest_score', 'multitest_score'], ['score'], [int], # [('extern', ut.load_data)], #configclass=DummyIndexerConfig, ) def multitest_score_x(depc, *args, **kwargs): raise NotImplementedError('hack') # REGISTER MATCHING ALGORITHMS @depc.register_preproc( tablename='neighbs', colnames=['qx2_idx', 'qx2_dist'], coltypes=[np.ndarray, np.ndarray], parents=['keypoint', 'fgweight', 'nnindexer', 'nnindexer']) def neighbs(depc, *args, **kwargs): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ #dummy_preproc_kpts for qaid in zip(args): yield np.array([qaid]), np.array([qaid]) @depc.register_preproc(tablename='neighbs_score', colnames=['qx2_dist'], coltypes=[np.ndarray], parents=['neighbs']) def neighbs_score(depc, *args, **kwargs): """ CommandLine: python -m dtool.base --exec-VsManySimilarityRequest """ raise NotImplementedError('hack') @depc.register_preproc('vsone', [dummy_root, dummy_root], ['score', 'match_obj', 'fm'], [float, DummyVsOneMatch, np.ndarray], requestclass=DummyVsOneRequest, configclass=DummyVsOneConfig, chunksize=2) def compute_vsone_matching(depc, qaids, daids, config): """ CommandLine: python -m dtool.base --exec-VsOneSimilarityRequest """ print('RUNNING DUMMY VSONE ALGO') for qaid, daid in zip(qaids, daids): match = DummyVsOneMatch() match.qaid = qaid match.daid = daid match.fm = np.array([[1, 2], [3, 4]]) score = match.score = qaid + daid yield (score, match, match.fm) # table = depc['spam'] # print(ut.repr2(table.get_addtable_kw(), nl=2)) depc.initialize() # table.print_schemadef() # print(table.db.get_schema_current_autogeneration_str()) return depc
def get_default_cell_template_list(ibs): """ Defines the order of ipython notebook cells """ cells = notebook_cells noexample = not ut.get_argflag('--examples') asreport = ut.get_argflag('--asreport') withtags = ut.get_argflag('--withtags') cell_template_list = [] info_cells = [ cells.pipe_config_info, cells.annot_config_info, # cells.per_encounter_stats, cells.timestamp_distribution, ] dev_analysis = [ cells.config_overlap, #cells.dbsize_expt, # None if ibs.get_dbname() == 'humpbacks' else cells.feat_score_sep, cells.all_annot_scoresep, cells.success_annot_scoresep, ] cell_template_list += [ cells.introduction if asreport else None, cells.nb_init, cells.db_init, None if ibs.get_dbname() != 'humpbacks' else cells.fluke_select, ] if not asreport: cell_template_list += info_cells if not noexample: cell_template_list += [ cells.example_annotations, cells.example_names, ] cell_template_list += [ cells.per_annotation_accuracy, cells.per_name_accuracy, cells.easy_success_cases, cells.hard_success_cases, cells.failure_type1_cases, cells.failure_type2_cases, cells.total_failure_cases, cells.timedelta_distribution, ] if withtags: cell_template_list += [ cells.investigate_specific_case, cells.view_intereseting_tags, ] if asreport: # Append our debug stuff at the bottom cell_template_list += [cells.IGNOREAFTER] cell_template_list += info_cells cell_template_list += dev_analysis cell_template_list += [ cells.config_disagree_cases, ] cell_template_list = ut.filter_Nones(cell_template_list) cell_template_list = ut.lmap(ut.normalize_cells, cell_template_list) if not asreport: # Remove all of the extra fluff cell_template_list = [(header.split('\n')[0], code, None) for (header, code, footer) in cell_template_list] return cell_template_list
def update_bindings(): r""" Returns: dict: matchtups CommandLine: python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings Example: >>> # DISABLE_DOCTEST >>> from autogen_bindings import * # NOQA >>> import sys >>> import utool as ut >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts')) >>> matchtups = update_bindings() >>> result = ('matchtups = %s' % (ut.repr2(matchtups),)) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ from os.path import basename import difflib import numpy as np import re binding_names = [ 'build_index', 'used_memory', 'add_points', 'remove_point', 'compute_cluster_centers', 'load_index', 'save_index', 'find_nearest_neighbors', 'radius_search', 'remove_points', 'free_index', 'find_nearest_neighbors_index', # 'size', # 'veclen', # 'get_point', # 'flann_get_distance_order', # 'flann_get_distance_type', # 'flann_log_verbosity', # 'clean_removed_points', ] _places = [ '~/code/flann/src/cpp/flann/flann.cpp', '~/code/flann/src/cpp/flann/flann.h', '~/code/flann/src/python/pyflann/flann_ctypes.py', '~/code/flann/src/python/pyflann/index.py', ] eof_sentinals = { # 'flann_ctypes.py': '# END DEFINE BINDINGS', 'flann_ctypes.py': 'def ensure_2d_array(arr', # 'flann.h': '// END DEFINE BINDINGS', 'flann.h': '#ifdef __cplusplus', 'flann.cpp': None, 'index.py': None, } block_sentinals = { 'flann.h': re.escape('/**'), 'flann.cpp': 'template *<typename Distance>', # 'flann_ctypes.py': '\n', 'flann_ctypes.py': 'flann\.[a-z_.]* =', # 'index.py': ' def .*', 'index.py': ' [^ ].*', } places = {basename(fpath): fpath for fpath in ut.lmap(ut.truepath, _places)} text_dict = ut.map_dict_vals(ut.readfrom, places) lines_dict = {key: val.split('\n') for key, val in text_dict.items()} orig_texts = text_dict.copy() # NOQA binding_defs = {} named_blocks = {} print('binding_names = %r' % (binding_names,)) for binding_name in binding_names: blocks, defs = autogen_parts(binding_name) binding_defs[binding_name] = defs named_blocks[binding_name] = blocks for binding_name in ut.ProgIter(binding_names): ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name,), 'yellow') blocks_dict = named_blocks[binding_name] for key in places.keys(): ut.colorprint('---- generating %s for %s -----' % (binding_name, key,), 'yellow') # key = 'flann_ctypes.py' # print(text_dict[key]) old_text = text_dict[key] line_list = lines_dict[key] #text = old_text block = blocks_dict[key] debug = ut.get_argflag('--debug') # debug = True # if debug: # print(ut.highlight_code(block, splitext(key)[1])) # Find a place in the code that already exists searchblock = block if key.endswith('.cpp') or key.endswith('.h'): searchblock = re.sub(ut.REGEX_C_COMMENT, '', searchblock, flags=re.MULTILINE | re.DOTALL) searchblock = '\n'.join(searchblock.splitlines()[0:3]) # @ut.cached_func(verbose=False) def cached_match(old_text, searchblock): def isjunk(x): return False return x in ' \t,*()' def isjunk2(x): return x in ' \t,*()' # Not sure why the first one just doesnt find it # isjunk = None sm = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=False) sm0 = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=True) sm1 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=False) sm2 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=True) matchtups = (sm.get_matching_blocks() + sm0.get_matching_blocks() + sm1.get_matching_blocks() + sm2.get_matching_blocks()) return matchtups matchtups = cached_match(old_text, searchblock) # Find a reasonable match in matchtups found = False if debug: # print('searchblock =\n%s' % (searchblock,)) print('searchblock = %r' % (searchblock,)) for (a, b, size) in matchtups: matchtext = old_text[a: a + size] pybind = binding_defs[binding_name]['py_binding_name'] if re.search(binding_name + '\\b', matchtext) or re.search(pybind + '\\b', matchtext): found = True pos = a + size if debug: print('MATCHING TEXT') print(matchtext) break else: if debug and 0: print('Not matching') print('matchtext = %r' % (matchtext,)) matchtext2 = old_text[a - 10: a + size + 20] print('matchtext2 = %r' % (matchtext2,)) if found: linelens = np.array(ut.lmap(len, line_list)) + 1 sumlen = np.cumsum(linelens) row = np.where(sumlen < pos)[0][-1] + 1 #print(line_list[row]) # Search for extents of the block to overwrite block_sentinal = block_sentinals[key] row1 = ut.find_block_end(row, line_list, block_sentinal, -1) - 1 row2 = ut.find_block_end(row + 1, line_list, block_sentinal, +1) eof_sentinal = eof_sentinals[key] if eof_sentinal is not None: print('eof_sentinal = %r' % (eof_sentinal,)) row2 = min([count for count, line in enumerate(line_list) if line.startswith(eof_sentinal)][-1], row2) nr = len((block + '\n\n').splitlines()) new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('FOUND AND REPLACING %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint('FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) else: # Append to end of the file eof_sentinal = eof_sentinals[key] if eof_sentinal is None: row2 = len(line_list) - 1 else: row2_choice = [count for count, line in enumerate(line_list) if line.startswith(eof_sentinal)] if len(row2_choice) == 0: row2 = len(line_list) - 1 assert False else: row2 = row2_choice[-1] - 1 # row1 = row2 - 1 # row2 = row2 - 1 row1 = row2 new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) # block + '\n\n\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) nr = len((block + '\n\n').splitlines()) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('NOT FOUND AND REPLACING %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint('NOT FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) text_dict[key] = '\n'.join(new_line_list) lines_dict[key] = new_line_list ut.colorprint('L___ GENERATED BINDING %s ___' % (binding_name,), 'yellow') for key in places: new_text = '\n'.join(lines_dict[key]) #ut.writeto(ut.augpath(places[key], '.new'), new_text) ut.writeto(ut.augpath(places[key]), new_text) for key in places: if ut.get_argflag('--diff'): difftext = ut.get_textdiff(orig_texts[key], new_text, num_context_lines=7, ignore_whitespace=True) difftext = ut.color_diff_text(difftext) print(difftext)
def __nice__(sample): denc_pername = ut.lmap(len, sample.dname_encs) n_denc_pername = np.mean(denc_pername) return 'nQaids={}, nDEncPerName={}, nConfu={}'.format( len(sample.qaids), n_denc_pername, len(sample.confusor_pool))
'git branch --set-upstream-to=origin/{upstream_branch} {upstream_branch}' .format(**locals())) upstream_push = GET_ARGVAL('--upstream-push', type_=str, default=None) if upstream_push is not None: ibeis_rman.issue( 'git push --set-upstream origin {upstream_push}'.format(**locals())) if GET_ARGFLAG('--test'): failures = [] for repo_dpath in ibeis_rman.repo_dirs: # ut.getp_ mod_dpaths = ut.get_submodules_from_dpath(repo_dpath, recursive=False, only_packages=True) modname_list = ut.lmap(ut.get_modname_from_modpath, mod_dpaths) print('Checking modules = %r' % (modname_list, )) for modname in modname_list: try: ut.import_modname(modname) print(modname + ' success') except ImportError as ex: failures += [modname] print(modname + ' failure') print('failures = %s' % (ut.repr3(failures), )) # print('repo_dpath = %r' % (repo_dpath,)) # print('modules = %r' % (modules,)) # import ibeis # print('found ibeis=%r' % (ibeis,))
def expand(sample, denc_per_name=[1], extra_dbsize_fracs=[0]): # Vary the number of database encounters in each sample target_daids_list = [] target_info_list_ = [] for num in denc_per_name: dname_encs_ = ut.take_column(sample.dname_encs, slice(0, num)) dnames_ = ut.lmap(ut.flatten, dname_encs_) daids_ = ut.total_flatten(dname_encs_) target_daids_list.append(daids_) name_lens = ut.lmap(len, dnames_) dpername = name_lens[0] if ut.allsame(name_lens) else np.mean( name_lens) target_info_list_.append( ut.odict([ ('qsize', len(sample.qaids)), ('t_n_names', len(dname_encs_)), ('t_dpername', dpername), ('t_denc_pername', num), ('t_dsize', len(daids_)), ])) # Append confusors to maintain a constant dbsize in each base sample dbsize_list = ut.lmap(len, target_daids_list) max_dsize = max(dbsize_list) n_need = max_dsize - min(dbsize_list) n_extra_avail = len(sample.confusor_pool) - n_need assert len(sample.confusor_pool) > n_need, 'not enough confusors' padded_daids_list = [] padded_info_list_ = [] for daids_, info_ in zip(target_daids_list, target_info_list_): num_take = max_dsize - len(daids_) pad_aids = sample.confusor_pool[:num_take] new_aids = daids_ + pad_aids info_ = info_.copy() info_['n_pad'] = len(pad_aids) info_['pad_dsize'] = len(new_aids) padded_info_list_.append(info_) padded_daids_list.append(new_aids) # Vary the dbsize by appending extra confusors if extra_dbsize_fracs is None: extra_dbsize_fracs = [1.0] extra_fracs = np.array(extra_dbsize_fracs) n_extra_list = np.unique(extra_fracs * n_extra_avail).astype(np.int) daids_list = [] info_list = [] for n in n_extra_list: for daids_, info_ in zip(padded_daids_list, padded_info_list_): extra_aids = sample.confusor_pool[len(sample.confusor_pool) - n:] daids = sorted(daids_ + extra_aids) daids_list.append(daids) info = info_.copy() info['n_extra'] = len(extra_aids) info['dsize'] = len(daids) info_list.append(info) import pandas as pd verbose = 0 if verbose: logger.info(pd.DataFrame.from_records(info_list)) logger.info('#qaids = %r' % (len(sample.qaids), )) logger.info('num_need = %r' % (n_need, )) logger.info('max_dsize = %r' % (max_dsize, )) return sample.qaids, daids_list, info_list
def run_asmk_script(): with ut.embed_on_exception_context: # NOQA """ >>> from wbia.algo.smk.script_smk import * """ # NOQA # ============================================== # PREPROCESSING CONFIGURATION # ============================================== config = { # 'data_year': 2013, 'data_year': None, 'dtype': 'float32', # 'root_sift': True, 'root_sift': False, # 'centering': True, 'centering': False, 'num_words': 2**16, # 'num_words': 1E6 # 'num_words': 8000, 'kmeans_impl': 'sklearn.mini', 'extern_words': False, 'extern_assign': False, 'assign_algo': 'kdtree', 'checks': 1024, 'int_rvec': True, 'only_xy': False, } # Define which params are relevant for which operations relevance = {} relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year'] relevance['words'] = relevance['feats'] + [ 'num_words', 'extern_words', 'kmeans_impl', ] relevance['assign'] = relevance['words'] + [ 'checks', 'extern_assign', 'assign_algo', ] # relevance['ydata'] = relevance['assign'] + ['int_rvec'] # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec'] nAssign = 1 class SMKCacher(ut.Cacher): def __init__(self, fname, ext='.cPkl'): relevant_params = relevance[fname] relevant_cfg = ut.dict_subset(config, relevant_params) cfgstr = ut.get_cfg_lbl(relevant_cfg) dbdir = ut.truepath('/raid/work/Oxford/') super(SMKCacher, self).__init__(fname, cfgstr, cache_dir=dbdir, ext=ext) # ============================================== # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES # ============================================== if config['data_year'] == 2007: data = load_oxford_2007() elif config['data_year'] == 2013: data = load_oxford_2013() elif config['data_year'] is None: data = load_oxford_wbia() offset_list = data['offset_list'] all_kpts = data['all_kpts'] raw_vecs = data['all_vecs'] query_uri_order = data['query_uri_order'] data_uri_order = data['data_uri_order'] # del data # ================ # PRE-PROCESS # ================ import vtool as vt # Alias names to avoid errors in interactive sessions proc_vecs = raw_vecs del raw_vecs feats_cacher = SMKCacher('feats', ext='.npy') all_vecs = feats_cacher.tryload() if all_vecs is None: if config['dtype'] == 'float32': logger.info('Converting vecs to float32') proc_vecs = proc_vecs.astype(np.float32) else: proc_vecs = proc_vecs raise NotImplementedError('other dtype') if config['root_sift']: with ut.Timer('Apply root sift'): np.sqrt(proc_vecs, out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['centering']: with ut.Timer('Apply centering'): mean_vec = np.mean(proc_vecs, axis=0) # Center and then re-normalize np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['dtype'] == 'int8': smk_funcs all_vecs = proc_vecs feats_cacher.save(all_vecs) del proc_vecs # ===================================== # BUILD VISUAL VOCABULARY # ===================================== if config['extern_words']: words = data['words'] assert config['num_words'] is None or len( words) == config['num_words'] else: word_cacher = SMKCacher('words') words = word_cacher.tryload() if words is None: with ut.embed_on_exception_context: if config['kmeans_impl'] == 'sklearn.mini': import sklearn.cluster rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) init_size = int(config['num_words'] * 4) # converged after 26043 iterations clusterer = sklearn.cluster.MiniBatchKMeans( config['num_words'], init_size=init_size, batch_size=1000, compute_labels=False, max_iter=20, random_state=rng, n_init=1, verbose=1, ) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ elif config['kmeans_impl'] == 'yael': from yael import ynumpy centroids, qerr, dis, assign, nassign = ynumpy.kmeans( all_vecs, config['num_words'], init='kmeans++', verbose=True, output='all', ) words = centroids word_cacher.save(words) # ===================================== # ASSIGN EACH VECTOR TO ITS NEAREST WORD # ===================================== if config['extern_assign']: assert config[ 'extern_words'], 'need extern cluster to extern assign' idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2) idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32) idx_to_wxs = np.ma.array(idx_to_wxs) idx_to_maws = np.ma.array(idx_to_maws) else: from wbia.algo.smk import vocab_indexer vocab = vocab_indexer.VisualVocab(words) dassign_cacher = SMKCacher('assign') assign_tup = dassign_cacher.tryload() if assign_tup is None: vocab.flann_params['algorithm'] = config['assign_algo'] vocab.build() # Takes 12 minutes to assign jegous vecs to 2**16 vocab with ut.Timer('assign vocab neighbors'): _idx_to_wx, _idx_to_wdist = vocab.nn_index( all_vecs, nAssign, checks=config['checks']) if nAssign > 1: idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns( _idx_to_wx, _idx_to_wdist, massign_alpha=1.2, massign_sigma=80.0, massign_equal_weights=True, ) else: idx_to_wxs = np.ma.masked_array(_idx_to_wx, fill_value=-1) idx_to_maws = np.ma.ones(idx_to_wxs.shape, fill_value=-1, dtype=np.float32) idx_to_maws.mask = idx_to_wxs.mask assign_tup = (idx_to_wxs, idx_to_maws) dassign_cacher.save(assign_tup) idx_to_wxs, idx_to_maws = assign_tup # Breakup vectors, keypoints, and word assignments by annotation wx_lists = [ idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list) ] maw_lists = [ idx_to_maws[left:right] for left, right in ut.itertwo(offset_list) ] vecs_list = [ all_vecs[left:right] for left, right in ut.itertwo(offset_list) ] kpts_list = [ all_kpts[left:right] for left, right in ut.itertwo(offset_list) ] # ======================= # FIND QUERY SUBREGIONS # ======================= ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots( data_uri_order, query_uri_order) daids = data_annots.aids qaids = query_annots.aids query_super_kpts = ut.take(kpts_list, qx_to_dx) query_super_vecs = ut.take(vecs_list, qx_to_dx) query_super_wxs = ut.take(wx_lists, qx_to_dx) query_super_maws = ut.take(maw_lists, qx_to_dx) # Mark which keypoints are within the bbox of the query query_flags_list = [] only_xy = config['only_xy'] for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes): flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy) query_flags_list.append(flags) logger.info('Queries are crops of existing database images.') logger.info('Looking at average percents') percent_list = [ flags_.sum() / flags_.shape[0] for flags_ in query_flags_list ] percent_stats = ut.get_stats(percent_list) logger.info('percent_stats = %s' % (ut.repr4(percent_stats), )) import vtool as vt query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0) query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0) query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0) query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0) # ======================= # CONSTRUCT QUERY / DATABASE REPR # ======================= # int_rvec = not config['dtype'].startswith('float') int_rvec = config['int_rvec'] X_list = [] _prog = ut.ProgPartial(length=len(qaids), label='new X', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog( zip(qaids, query_wxs, query_maws)): X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) X_list.append(X) # ydata_cacher = SMKCacher('ydata') # Y_list = ydata_cacher.tryload() # if Y_list is None: Y_list = [] _prog = ut.ProgPartial(length=len(daids), label='new Y', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists, maw_lists)): Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) Y_list.append(Y) # ydata_cacher.save(Y_list) # ====================== # Add in some groundtruth logger.info('Add in some groundtruth') for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)): Y.nid = nid for X, nid in zip(X_list, ibs.get_annot_nids(qaids)): X.nid = nid for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)): Y.qual = qual # ====================== # Add in other properties for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list): Y.vecs = vecs Y.kpts = kpts imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images') for Y, imgid in zip(Y_list, data_uri_order): gpath = ut.unixjoin(imgdir, imgid + '.jpg') Y.gpath = gpath for X, vecs, kpts in zip(X_list, query_vecs, query_kpts): X.kpts = kpts X.vecs = vecs # ====================== logger.info('Building inverted list') daids = [Y.aid for Y in Y_list] # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list])) wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list])) assert daids == data_annots.aids assert len(wx_list) <= config['num_words'] wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list], all_wxs=wx_list) # Compute IDF weights logger.info('Compute IDF weights') ndocs_total = len(daids) # Use only the unique number of words ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list]) logger.info('ndocs_perword stats: ' + ut.repr4(ut.get_stats(ndocs_per_word))) idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word) wx_to_weight = dict(zip(wx_list, idf_per_word)) logger.info('idf stats: ' + ut.repr4(ut.get_stats(wx_to_weight.values()))) # Filter junk Y_list_ = [Y for Y in Y_list if Y.qual != 'junk'] # ======================= # CHOOSE QUERY KERNEL # ======================= params = { 'asmk': dict(alpha=3.0, thresh=0.0), 'bow': dict(), 'bow2': dict(), } # method = 'bow' method = 'bow2' method = 'asmk' smk = SMK(wx_to_weight, method=method, **params[method]) # Specific info for the type of query if method == 'asmk': # Make residual vectors if True: # The stacked way is 50x faster # TODO: extend for multi-assignment and record fxs flat_query_vecs = np.vstack(query_vecs) flat_query_wxs = np.vstack(query_wxs) flat_query_offsets = np.array( [0] + ut.cumsum(ut.lmap(len, query_wxs))) flat_wxs_assign = flat_query_wxs flat_offsets = flat_query_offsets flat_vecs = flat_query_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list, agg_flags_list): X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags[:, None] flat_wxs_assign = idx_to_wxs flat_offsets = offset_list flat_vecs = all_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list, agg_flags_list): Y.agg_rvecs = agg_rvecs Y.agg_flags = agg_flags[:, None] else: # This non-stacked way is about 500x slower _prog = ut.ProgPartial(label='agg Y rvecs', bs=True, adjust=True) for Y in _prog(Y_list_): make_agg_vecs(Y, words, Y.vecs) _prog = ut.ProgPartial(label='agg X rvecs', bs=True, adjust=True) for X in _prog(X_list): make_agg_vecs(X, words, X.vecs) elif method == 'bow2': # Hack for orig tf-idf bow vector nwords = len(words) for X in ut.ProgIter(X_list, label='make bow vector'): ensure_tf(X) bow_vector(X, wx_to_weight, nwords) for Y in ut.ProgIter(Y_list_, label='make bow vector'): ensure_tf(Y) bow_vector(Y, wx_to_weight, nwords) if method != 'bow2': for X in ut.ProgIter(X_list, 'compute X gamma'): X.gamma = smk.gamma(X) for Y in ut.ProgIter(Y_list_, 'compute Y gamma'): Y.gamma = smk.gamma(Y) # Execute matches (could go faster by enumerating candidates) scores_list = [] for X in ut.ProgIter(X_list, label='query %s' % (smk, )): scores = [smk.kernel(X, Y) for Y in Y_list_] scores = np.array(scores) scores = np.nan_to_num(scores) scores_list.append(scores) import sklearn.metrics avep_list = [] _iter = list(zip(scores_list, X_list)) _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, )) for scores, X in _iter: truth = [X.nid == Y.nid for Y in Y_list_] avep = sklearn.metrics.average_precision_score(truth, scores) avep_list.append(avep) avep_list = np.array(avep_list) mAP = np.mean(avep_list) logger.info('mAP = %r' % (mAP, ))
def testdata_depc(fname=None): import vtool as vt gpath_list = ut.lmap(ut.grab_test_imgpath, ut.get_valid_test_imgkeys(), verbose=False) dummy_root = 'dummy_annot' def root_asobject(aid): """ Convinience for writing preproc funcs """ gpath = gpath_list[aid] root_obj = ut.LazyDict({ 'aid': aid, 'gpath': gpath, 'image': lambda: vt.imread(gpath) }) return root_obj depc = DependencyCache(root_tablename=dummy_root, default_fname=fname, root_asobject=root_asobject, use_globals=False) _register_preproc = depc.register_preproc @_register_preproc( tablename='chipmask', parents=[dummy_root], colnames=['size', 'mask'], coltypes=[(int, int), ('extern', vt.imread, vt.imwrite)]) def dummy_manual_chipmask(depc, parent_rowids, config=None): import vtool as vt from plottool import interact_impaint mask_dpath = ut.unixjoin(depc.cache_dpath, 'ManualChipMask') ut.ensuredir(mask_dpath) if config is None: config = {} print('Requesting user defined chip mask') for rowid in parent_rowids: img = vt.imread(gpath_list[rowid]) mask = interact_impaint.impaint_mask2(img) mask_fpath = ut.unixjoin(mask_dpath, 'mask%d.png' % (rowid,)) vt.imwrite(mask_fpath, mask) w, h = vt.get_size(mask) yield (w, h), mask_fpath @_register_preproc( tablename='chip', parents=[dummy_root], colnames=['size', 'chip'], coltypes=[(int, int), vt.imread], asobject=True) def dummy_preproc_chip(depc, annot_list, config=None): """ TODO: Infer properties from docstr Args: annot_list (list): list of annot objects config (dict): config dictionary Returns: tuple : ((int, int), ('extern', vt.imread)) """ if config is None: config = {} # Demonstates using asobject to get input to function as a dictionary # of properties for annot in annot_list: print('Computing chips of annot=%r' % (annot,)) chip_fpath = annot['gpath'] w, h = vt.image.open_image_size(chip_fpath) size = (w, h) print('* chip_fpath = %r' % (chip_fpath,)) print('* size = %r' % (size,)) yield size, chip_fpath @_register_preproc( 'probchip', [dummy_root], ['size', 'probchip'], coltypes=[(int, int), ('extern', vt.imread)]) def dummy_preproc_probchip(depc, parent_rowids, config=None): if config is None: config = {} print('Computing probchip') for rowid in parent_rowids: yield (rowid, rowid), 'probchip.jpg' @_register_preproc( 'keypoint', ['chip'], ['kpts', 'num'], [np.ndarray, int], docstr='Used to store individual chip features (ellipses)',) def dummy_preproc_kpts(depc, parent_rowids, config=None): if config is None: config = {} print('Computing kpts') for rowid in parent_rowids: yield np.ones((7 + rowid, 6)) + rowid, 7 + rowid @_register_preproc('descriptor', ['keypoint'], ['vecs'], [np.ndarray],) def dummy_preproc_vecs(depc, parent_rowids, config=None): if config is None: config = {} print('Computing vecs') for rowid in parent_rowids: yield np.ones((7 + rowid, 8), dtype=np.uint8) + rowid, @_register_preproc('fgweight', ['keypoint', 'probchip'], ['fgweight'], [np.ndarray],) def dummy_preproc_fgweight(depc, kpts_rowid, probchip_rowid, config=None): if config is None: config = {} print('Computing fgweight') for rowid1, rowid2 in zip(kpts_rowid, probchip_rowid): yield np.ones(7 + rowid1), @_register_preproc('notch', [dummy_root], ['notchdata'],) def dummy_preproc_notch(depc, parent_rowids, config=None): if config is None: config = {} print('Computing notch') for rowid in parent_rowids: yield np.empty(5 + rowid), @_register_preproc('spam', ['fgweight', 'chip', 'keypoint'], ['spam', 'eggs', 'size', 'uuid', 'vector', 'textdata'], [str, int, (int, int), uuid.UUID, np.ndarray, ('extern', ut.readfrom)], docstr='I dont like spam',) def dummy_preproc_spam(depc, *args, **kwargs): config = kwargs.get('config', None) if config is None: config = {} print('Computing notch') ut.writeto('tmp.txt', ut.lorium_ipsum()) for x in zip(*args): size = (42, 21) uuid = ut.get_zero_uuid() vector = np.ones(3) yield ('spam', 3665, size, uuid, vector, 'tmp.txt') # table = depc['spam'] # print(ut.repr2(table.get_addtable_kw(), nl=2)) depc.initialize() # table.print_schemadef() # print(table.db.get_schema_current_autogeneration_str()) return depc