def build_dpath_to_fidx(fpath_list, fidx_list, root_dpath): dpath_to_fidx = ut.ddict(list) nTotal = len(fpath_list) _iter = zip(fidx_list, fpath_list) dpath_to_fidx = ut.ddict(list) for fidx, fpath in ut.ProgIter(_iter, 'making dpath fidx map', freq=50000, nTotal=nTotal): current_path = fpath while True: current_path = dirname(current_path) dpath_to_fidx[current_path].append(fidx) if current_path == root_dpath: break return dpath_to_fidx
def _get_cm_edge_data(infr, edges, cm_list=None): symmetric = True if cm_list is None: cm_list = infr.cm_list # Find scores for the edges that exist in the graph edge_to_data = ut.ddict(dict) aid_to_cm = {cm.qaid: cm for cm in cm_list} for u, v in edges: if symmetric: u, v = e_(u, v) cm1 = aid_to_cm.get(u, None) cm2 = aid_to_cm.get(v, None) scores = [] ranks = [] for cm in ut.filter_Nones([cm1, cm2]): for aid in [u, v]: idx = cm.daid2_idx.get(aid, None) if idx is None: continue score = cm.annot_score_list[idx] rank = cm.get_annot_ranks([aid])[0] scores.append(score) ranks.append(rank) if len(scores) == 0: score = None rank = None else: # Choose whichever one gave the best score idx = vt.safe_argmax(scores, nans=False) score = scores[idx] rank = ranks[idx] edge_to_data[(u, v)]['score'] = score edge_to_data[(u, v)]['rank'] = rank return edge_to_data
def convert_multigraph_to_graph(G): """ For each duplicate edge make a dummy node. TODO: preserve data, keys, and directedness """ import utool as ut edge_list = list(G.edges()) node_list = list(G.nodes()) dupitem_to_idx = ut.find_duplicate_items(edge_list) node_to_freq = ut.ddict(lambda: 0) remove_idxs = ut.flatten(dupitem_to_idx.values()) ut.delete_items_by_index(edge_list, remove_idxs) for dup_edge in dupitem_to_idx.keys(): freq = len(dupitem_to_idx[dup_edge]) u, v = dup_edge[0:2] pair_node = dup_edge pair_nodes = [pair_node + tuple([count]) for count in range(freq)] for pair_node in pair_nodes: node_list.append(pair_node) for node in dup_edge: node_to_freq[node] += freq edge_list.append((u, pair_node)) edge_list.append((pair_node, v)) import networkx as nx G2 = nx.DiGraph() G2.add_edges_from(edge_list) G2.add_nodes_from(node_list) return G2
def nx_to_adj_dict(graph): import utool as ut adj_dict = ut.ddict(list) for u, edges in graph.adjacency(): adj_dict[u].extend(list(edges.keys())) adj_dict = dict(adj_dict) return adj_dict
def mcc_hack(): sample_weight = np.ones(len(self.samples), dtype=np.int) task_mccs = ut.ddict(dict) # Determine threshold levels per score type score_to_order = {} for scoretype in score_dict.keys(): y_score = score_dict[scoretype].values sortx = np.argsort(y_score, kind='mergesort')[::-1] y_score = y_score[sortx] distinct_value_indices = np.where(np.diff(y_score))[0] threshold_idxs = np.r_[distinct_value_indices, y_score.size - 1] thresh = y_score[threshold_idxs] score_to_order[scoretype] = (sortx, y_score, thresh) classes_ = np.array([0, 1], dtype=np.int) for task in task_list: labels = self.samples.subtasks[task] for sublabels in labels.gen_one_vs_rest_labels(): for scoretype in score_dict.keys(): sortx, y_score, thresh = score_to_order[scoretype] y_true = sublabels.y_enc[sortx] mcc = -np.inf for t in thresh: y_pred = (y_score > t).astype(np.int) C1 = quick_cm(y_true, y_pred, classes_, sample_weight) mcc1 = quick_mcc(C1) if mcc1 < 0: C2 = quick_cm(y_true, 1 - y_pred, classes_, sample_weight) mcc1 = quick_mcc(C2) mcc = max(mcc1, mcc) # logger.info('mcc = %r' % (mcc,)) task_mccs[sublabels.task_name][scoretype] = mcc return task_mccs
def cheetah_stats(ibs): filters = [ dict(view=['right', 'frontright', 'backright'], minqual='good'), dict(view=['right', 'frontright', 'backright']), ] for filtkw in filters: annots = ibs.annots(ibs.filter_annots_general(**filtkw)) unique_nids, grouped_annots = annots.group(annots.nids) annots_per_name = ut.lmap(len, grouped_annots) annots_per_name_freq = ut.dict_hist(annots_per_name) def bin_mapper(num): if num < 5: return (num, num + 1) else: for bin, mod in [(20, 5), (50, 10)]: if num < bin: low = (num // mod) * mod high = low + mod return (low, high) if num >= bin: return (bin, None) else: assert False, str(num) hist = ut.ddict(lambda: 0) for num in annots_per_name: hist[bin_mapper(num)] += 1 hist = ut.sort_dict(hist) print('------------') print('filters = %s' % ut.repr4(filtkw)) print('num_annots = %r' % (len(annots))) print('num_names = %r' % (len(unique_nids))) print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq))) print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist))) assert sum(hist.values()) == len(unique_nids)
def trnps_(dict_list): """ tranpose dict list """ list_dict = ut.ddict(list) for dict_ in dict_list: for key, val in dict_.items(): list_dict[key + '_list'].append(val) return list_dict
def clean_line_profile_text(text): """ Sorts the output from line profile by execution time Removes entries which were not run """ # # Split the file into blocks along delimters and and put delimeters back in the list delim = 'Total time: ' #delim = 'File: ' list_ = utool.regex_split('^' + delim, text) for ix in range(1, len(list_)): list_[ix] = delim + list_[ix] # # Build a map from times to line_profile blocks prefix_list = [] timemap = utool.ddict(list) for ix in range(len(list_)): block = list_[ix] total_time = get_block_totaltime(block) # Blocks without time go at the front of sorted output if total_time is None: prefix_list.append(block) # Blocks that are not run are not appended to output elif total_time != 0: timemap[total_time].append(block) # Sort the blocks by time sorted_lists = sorted(six.iteritems(timemap), key=operator.itemgetter(0)) newlist = prefix_list[:] for key, val in sorted_lists: newlist.extend(val) # Rejoin output text output_text = '\n'.join(newlist) return output_text
def make_header(tblname): """ Args: table_name - the internal table name """ tblnice = TABLE_NICE[tblname] colnames = TABLE_COLNAMES[tblname] editset = TABLE_EDITSET[tblname] tblgetters = getters[tblname] tblsetters = setters[tblname] #if levels aren't found, we're not dealing with a tree, so everything is at level 0 collevel_dict = TABLE_TREE_LEVELS.get(tblname, ut.ddict(lambda: 0)) collevels = [collevel_dict[colname] for colname in colnames] hiddencols = TABLE_HIDDEN_LIST.get(tblname, [False for _ in range(len(colnames))]) numstripes = TABLE_STRIPE_LIST.get(tblname, 1) colwidths_dict = widths.get(tblname, {}) colwidths = [colwidths_dict.get(colname, 100) for colname in colnames] def get_column_data(colname): try: coldef_tup = COL_DEF[colname] coltype, colnice = coldef_tup except KeyError as ex: strict = False ut.printex(ex, 'Need to add type info for colname=%r to COL_DEF' % colname, iswarning=not strict) if strict: raise else: # default coldef to give a string type and nice=colname coltype, colnice = (str, colname) coledit = colname in editset colgetter = tblgetters[colname] colsetter = None if not coledit else tblsetters.get(colname, None) return (coltype, colnice, coledit, colgetter, colsetter) try: _tuplist = list(zip(*list(map(get_column_data, colnames)))) (coltypes, colnices, coledits, colgetters, colsetters) = _tuplist except KeyError as ex: ut.printex(ex, key_list=['tblname', 'colnames']) raise header = { 'name' : tblname, 'nice' : tblnice, 'iders' : iders[tblname], 'col_name_list' : colnames, 'col_type_list' : coltypes, 'col_nice_list' : colnices, 'col_edit_list' : coledits, 'col_getter_list' : colgetters, 'col_setter_list' : colsetters, 'col_level_list' : collevels, 'col_hidden_list' : hiddencols, 'num_duplicates' : numstripes, 'get_thumb_size' : lambda: ibs.cfg.other_cfg.thumb_size, 'col_width_list' : colwidths, # TODO } return header
def init_tablecache(): r""" Returns: defaultdict: tablecache CommandLine: python -m ibeis.control.accessor_decors --test-init_tablecache Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.accessor_decors import * # NOQA >>> result = init_tablecache() >>> print(result) """ # 4 levels of dictionaries # tablename, colname, kwargs, and then rowids tablecache = ut.ddict(lambda: ut.ddict(lambda: ut.ddict(dict))) return tablecache
def build_multindex(list_): """ Creates mapping from unique items to indicies at which they appear """ multiindex_dict_ = ut.ddict(list) for item, index in zip(list_, range(len(list_))): if item is not None: multiindex_dict_[item].append(index) return multiindex_dict_
def accumulate_scores(dscores_list, daids_list): """ helper to accumulate grouped scores for database annotations """ daid2_aggscore = utool.ddict(lambda: 0) ### Weirdly iflatten was slower here for dscores, daids in zip(dscores_list, daids_list): for daid, score in zip(daids, dscores): daid2_aggscore[daid] += score daid_agg_keys = np.array(list(daid2_aggscore.keys())) daid_agg_scores = np.array(list(daid2_aggscore.values())) return daid_agg_keys, daid_agg_scores
def filter_duplicate_acfgs(expanded_aids_list, acfg_list, acfg_name_list, verbose=None): """ Removes configs with the same expanded aids list CommandLine: # The following will trigger this function: wbia -m wbia get_annotcfg_list:0 -a timectrl timectrl:view=left --db PZ_MTEST """ from wbia.expt import annotation_configs if verbose is None: verbose = ut.VERBOSE acfg_list_ = [] expanded_aids_list_ = [] seen_ = ut.ddict(list) for acfg, (qaids, daids) in zip(acfg_list, expanded_aids_list): key = (ut.hashstr_arr27(qaids, 'qaids'), ut.hashstr_arr27(daids, 'daids')) if key in seen_: seen_[key].append(acfg) continue else: seen_[key].append(acfg) expanded_aids_list_.append((qaids, daids)) acfg_list_.append(acfg) if verbose: duplicate_configs = dict( [(key_, val_) for key_, val_ in seen_.items() if len(val_) > 1] ) if len(duplicate_configs) > 0: logger.info('The following configs produced duplicate annnotation configs') for key, val in duplicate_configs.items(): # Print the difference between the duplicate configs _tup = annotation_configs.compress_acfg_list_for_printing(val) nonvaried_compressed_dict, varied_compressed_dict_list = _tup logger.info('+--') logger.info('key = %r' % (key,)) logger.info( 'duplicate_varied_cfgs = %s' % (ut.repr2(varied_compressed_dict_list),) ) logger.info( 'duplicate_nonvaried_cfgs = %s' % (ut.repr2(nonvaried_compressed_dict),) ) logger.info('L__') if verbose >= 1: logger.info( '[harn.help] parsed %d / %d unique annot configs' % (len(acfg_list_), len(acfg_list)) ) if verbose > 2: logger.info('[harn.help] parsed from: %r' % (acfg_name_list,)) return expanded_aids_list_, acfg_list_
def __init__(self, rowids, obj1d, cache=None): self._rowids = list(rowids) self._obj1d = obj1d self._unique_rowids = set(self._rowids) self._unique_inverse = ut.list_alignment(self._unique_rowids, self._rowids) if cache is None: self._cache = ut.ddict(dict) else: self._cache = cache # Views always cache data for now self._caching = True
def sub(self, other): """ CommandLine: python -m mtgmonte.mtgobjs --exec-ManaSet.sub:0 python -m mtgmonte.mtgobjs --exec-ManaSet.sub:1 Example: >>> # ENABLE_DOCTEST >>> from mtgmonte.mtgobjs import * >>> from mtgmonte import mtgobjs >>> self = mtgobjs.ManaSet('RRRUC') >>> other = mtgobjs.ManaSet('RRU') >>> mana = self - other >>> result = ('mana = %s' % (mana,)) >>> print(result) mana = {RC} Example: >>> # ENABLE_DOCTEST >>> from mtgmonte.mtgobjs import * # NOQA >>> self = ManaSet(['WWURC']) >>> other = ManaCost([('W', 'colored'), ('W', 'colored'), ('U', 'colored'), ('1', 'uncolored')]) >>> mana = self - other >>> result = ('mana = %s' % (mana,)) >>> print(result) mana = {R} """ if isinstance(other, ManaCost): colored_cost = other.colored.to_manaset() remainder1 = self.sub(colored_cost) color2_remain = remainder1.get_colordict() uncolored_need = other.num_uncolored # TODO: value different colors differently for payment if uncolored_need > 0: for color in list(color2_remain.keys()): using = min(uncolored_need, color2_remain[color]) color2_remain[color] -= using uncolored_need -= using if uncolored_need > 0: raise NotEnoughManaError('Cannot subtract more mana from less') # Todo hybrid / phyrexian else: color2_need = ut.dict_hist(other._manas) color2_remain = ut.ddict(lambda: 0, ut.dict_hist(self._manas)) for color, num_need in color2_need.items(): num_have = color2_remain[color] if num_have < num_need: raise NotEnoughManaError('Cannot subtract more mana from less') color2_remain[color] -= num_need color2_remain = delete_dict_zeros(color2_remain) remainder = ManaSet(color2_remain) return remainder
def search_env_paths(fname, key_list=None, verbose=None): r""" Searches your PATH to see if fname exists Args: fname (str): file name to search for (can be glob pattern) CommandLine: python -m utool search_env_paths --fname msvcr*.dll python -m utool search_env_paths --fname '*flann*' Example: >>> # DISABLE_DOCTEST >>> from utool.util_cplat import * # NOQA >>> import utool as ut >>> fname = 'opencv2/highgui/libopencv_highgui.so' >>> fname = ut.get_argval('--fname', default='*') >>> print('fname = %r' % (fname,)) >>> key_list = None # ['PATH'] >>> found = search_env_paths(fname, key_list) >>> print(ut.repr4(found, nl=True, strvals=True)) Ignore: OpenCV_DIR:PATH={share_opencv} OpenCV_CONFIG_PATH:FILEPATH={share_opencv} """ import utool as ut # from os.path import join if key_list is None: key_list = [key for key in os.environ if key.find('PATH') > -1] print('key_list = %r' % (key_list,)) found = ut.ddict(list) for key in key_list: dpath_list = os.environ[key].split(os.pathsep) for dpath in dpath_list: #if verbose: # print('dpath = %r' % (dpath,)) # testname = join(dpath, fname) matches = ut.glob(dpath, fname) found[key].extend(matches) #import fnmatch #import utool #utool.embed() #if ut.checkpath(testname, verbose=False): # if verbose: # print('Found in key=%r' % (key,)) # ut.checkpath(testname, verbose=True, info=True) # found += [testname] return dict(found)
def _get_cm_agg_aid_ranking(infr, cc): aid_to_cm = {cm.qaid: cm for cm in infr.cm_list} all_scores = ut.ddict(list) for qaid in cc: cm = aid_to_cm[qaid] # should we be doing nids? for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()): all_scores[daid].append(score) max_scores = sorted( (max(scores), aid) for aid, scores in all_scores.items())[::-1] ranked_aids = ut.take_column(max_scores, 1) return ranked_aids
def find_pretrained(self): import glob import parse fname_fmt = self.fname_fmtstr + '.cPkl' task_clf_candidates = ut.ddict(list) globstr = self.fname_parts[0] + '.*.cPkl' for fpath in glob.iglob(join(self.dpath, globstr)): fname = basename(fpath) result = parse.parse(fname_fmt, fname) if result: task_key = result.named['task_key'] task_clf_candidates[task_key].append(fpath) return task_clf_candidates
def get_photobomber_map(ibs, aids, aid_to_nid=None): """ Builds map of which names that photobomb other names. python -m wbia.gui.id_review_api --test-test_review_widget --show --db PZ_MTEST -a default:qindex=0 >>> import wbia >>> dbdir = ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = wbia.opendb(dbdir='/home/joncrall/lev/media/danger/GGR/GGR-IBEIS') >>> filter_kw = { >>> 'multiple': False, >>> 'minqual': 'good', >>> 'is_known': True, >>> 'min_pername': 2, >>> 'view': ['right'], >>> } >>> aids = ibs.filter_annots_general(ibs.get_valid_aids(), filter_kw=filter_kw) """ ams_list = ibs.get_annotmatch_rowids_from_aid(aids) flags_list = ibs.unflat_map( ut.partial(ibs.get_annotmatch_prop, 'Photobomb'), ams_list) pb_ams = ut.zipcompress(ams_list, flags_list) has_pb_ams = [len(ams) > 0 for ams in pb_ams] pb_ams_ = ut.compress(pb_ams, has_pb_ams) # aids_ = ut.compress(aids, has_pb_ams) pb_ams_flat = ut.flatten(pb_ams_) pb_aids1_ = ibs.get_annotmatch_aid1(pb_ams_flat) pb_aids2_ = ibs.get_annotmatch_aid2(pb_ams_flat) pb_aid_pairs_ = list(zip(pb_aids1_, pb_aids2_)) if aid_to_nid is None: pb_nid_pairs_ = ibs.unflat_map(ibs.get_annot_nids, pb_aid_pairs_) else: pb_nid_pairs_ = ibs.unflat_map(ut.partial(ut.take, aid_to_nid), pb_aid_pairs_) # invalid_aid_map = ut.ddict(set) # for aid1, aid2 in pb_aid_pairs_: # if aid1 != aid2: # invalid_aid_map[aid1].add(aid2) # invalid_aid_map[aid2].add(aid1) invalid_nid_map = ut.ddict(set) for nid1, nid2 in pb_nid_pairs_: if nid1 != nid2: invalid_nid_map[nid1].add(nid2) invalid_nid_map[nid2].add(nid1) return invalid_nid_map
def search_env_paths(fname, key_list=None, verbose=None): r""" Searches your PATH to see if fname exists Args: fname (str): file name to search for (can be glob pattern) CommandLine: python -m utool search_env_paths --fname msvcr*.dll Example: >>> # DISABLE_DOCTEST >>> from utool.util_cplat import * # NOQA >>> import utool as ut >>> fname = 'opencv2/highgui/libopencv_highgui.so' >>> fname = ut.get_argval('--fname', default='*') >>> key_list = ['PATH'] >>> found = search_env_paths(fname, key_list) >>> print(ut.dict_str(found, nl=True, strvals=True)) Ignore: OpenCV_DIR:PATH={share_opencv} OpenCV_CONFIG_PATH:FILEPATH={share_opencv} """ import utool as ut # from os.path import join if key_list is None: key_list = [key for key in os.environ if key.find('PATH') > -1] found = ut.ddict(list) for key in key_list: dpath_list = os.environ[key].split(os.pathsep) for dpath in dpath_list: #if verbose: # print('dpath = %r' % (dpath,)) # testname = join(dpath, fname) matches = ut.glob(dpath, fname) found[key].extend(matches) #import fnmatch #import utool #utool.embed() #if ut.checkpath(testname, verbose=False): # if verbose: # print('Found in key=%r' % (key,)) # ut.checkpath(testname, verbose=True, info=True) # found += [testname] return dict(found)
def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict
def compute_data_gamma_(invindex, use_cache=True): """ >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> with_internals = True >>> invindex = index_data_annots(annots_df, daids, words, with_internals) >>> daid2_gamma = compute_data_gamma_(invindex, use_cache=True) """ cache_key = utool.hashstr(invindex.get_cfgstr()) if use_cache: try: daid2_gamma = utool.global_cache_read(cache_key, appname='smk') #print('gamma_dbg cache hit') return daid2_gamma except Exception: pass # Gropuing by aid and words mark, end_ = utool.log_progress(('gamma grouping %s ' % (cache_key, )), invindex.wx2_drvecs.shape[0], flushfreq=100) daid2_wx2_drvecs = utool.ddict(dict) for count, wx in enumerate(invindex.wx2_drvecs.index): if count % 100 == 0: mark(wx) group = invindex.wx2_drvecs[wx].groupby(invindex.idx2_daid) for daid, vecs in group: daid2_wx2_drvecs[daid][wx] = vecs.values end_() # Summation over words for each aid mark, end_ = utool.log_progress('gamma summation ', len(daid2_wx2_drvecs), flushfreq=100) daid2_gamma = pd.Series(np.zeros(invindex.daids.shape[0]), index=invindex.daids, name='gamma') wx2_weight = invindex.wx2_weight for count, (daid, wx2_drvecs) in enumerate(six.iteritems(daid2_wx2_drvecs)): if count % 100 == 0: mark(count) wx2_rvecs = wx2_drvecs daid2_gamma[daid] = gamma_summation(wx2_rvecs, wx2_weight) utool.global_cache_write(cache_key, daid2_gamma, appname='smk') return daid2_gamma
def parse_timemap_from_blocks(profile_block_list): """ Build a map from times to line_profile blocks """ prefix_list = [] timemap = ut.ddict(list) for ix in range(len(profile_block_list)): block = profile_block_list[ix] total_time = get_block_totaltime(block) # Blocks without time go at the front of sorted output if total_time is None: prefix_list.append(block) # Blocks that are not run are not appended to output elif total_time != 0: timemap[total_time].append(block) return prefix_list, timemap
def compute_data_gamma_(invindex, use_cache=True): """ >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> with_internals = True >>> invindex = index_data_annots(annots_df, daids, words, with_internals) >>> daid2_gamma = compute_data_gamma_(invindex, use_cache=True) """ cache_key = utool.hashstr(invindex.get_cfgstr()) if use_cache: try: daid2_gamma = utool.global_cache_read(cache_key, appname='smk') #print('gamma_dbg cache hit') return daid2_gamma except Exception: pass # Gropuing by aid and words mark, end_ = utool.log_progress(('gamma grouping %s ' % (cache_key,)), invindex.wx2_drvecs.shape[0], flushfreq=100) daid2_wx2_drvecs = utool.ddict(dict) for count, wx in enumerate(invindex.wx2_drvecs.index): if count % 100 == 0: mark(wx) group = invindex.wx2_drvecs[wx].groupby(invindex.idx2_daid) for daid, vecs in group: daid2_wx2_drvecs[daid][wx] = vecs.values end_() # Summation over words for each aid mark, end_ = utool.log_progress('gamma summation ', len(daid2_wx2_drvecs), flushfreq=100) daid2_gamma = pd.Series( np.zeros(invindex.daids.shape[0]), index=invindex.daids, name='gamma') wx2_weight = invindex.wx2_weight for count, (daid, wx2_drvecs) in enumerate(six.iteritems(daid2_wx2_drvecs)): if count % 100 == 0: mark(count) wx2_rvecs = wx2_drvecs daid2_gamma[daid] = gamma_summation(wx2_rvecs, wx2_weight) utool.global_cache_write(cache_key, daid2_gamma, appname='smk') return daid2_gamma
def analyze_internal_duplicats(self): multis = self.find_internal_duplicates() unique_dnames = set([]) associations = ut.ddict(lambda: 0) # diag_dups = [] # other_dups = [] for sub in multis.group_items('hash').values(): dnames = sub['dname'] unique_dnames.update(dnames) for dn1, dn2 in ut.combinations(dnames, 2): # if dn1 == dn2: # diag_dups[dn1] += 1 key = tuple(sorted([dn1, dn2])) associations[key] += 1 print(sub['dname'])
def make_header(tblname): """ Input: table_name - the internal table name """ tblnice = TABLE_NICE[tblname] colnames = TABLE_COLNAMES[tblname] editset = TABLE_EDITSET[tblname] tblgetters = getters[tblname] tblsetters = setters[tblname] #if levels aren't found, we're not dealing with a tree, so everything is at level 0 collevel_dict = TABLE_TREE_LEVELS.get(tblname, utool.ddict(lambda: 0)) collevels = [collevel_dict[colname] for colname in colnames] hiddencols = TABLE_HIDDEN_LIST.get(tblname, [False for _ in xrange(len(colnames))]) numstripes = TABLE_STRIPE_LIST.get(tblname, 1) def get_column_data(colname): coltype = COL_DEF[colname][0] colnice = COL_DEF[colname][1] coledit = colname in editset colgetter = tblgetters[colname] colsetter = None if not coledit else tblsetters.get(colname, None) return (coltype, colnice, coledit, colgetter, colsetter) try: (coltypes, colnices, coledits, colgetters, colsetters) = zip(*map(get_column_data, colnames)) except KeyError as ex: utool.printex(ex, key_list=['tblname', 'colnames']) raise header = { 'name': tblname, 'nice': tblnice, 'iders': iders[tblname], 'col_name_list': colnames, 'col_type_list': coltypes, 'col_nice_list': colnices, 'col_edit_list': coledits, 'col_getter_list': colgetters, 'col_setter_list': colsetters, 'col_level_list': collevels, 'col_hidden_list' : hiddencols, 'num_duplicates' : numstripes, } return header
def find_used_citations(tex_fpath_list, return_inverse=False): """ fpaths = get_thesis_tex_fpaths() """ citekey_list = [] inverse = ut.ddict(list) for tex_fpath in tex_fpath_list: text = ut.read_from(tex_fpath) #print('\n\n+-----') local_cites = find_citations(text) citekey_list.extend(local_cites) for key in local_cites: inverse[key].append(tex_fpath) citekey_list = sorted(set(citekey_list)) if return_inverse: return citekey_list, inverse else: return citekey_list
def compute_idf_label1(aids_list, daid2_label): """ One of our idf extensions Example: >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> wx_series = np.arange(len(invindex.words)) >>> idx2_aid = invindex.idx2_daid >>> daid2_label = invindex.daid2_label >>> _ = helper_idf_wordgroup(wx2_idxs, idx2_aid, wx_series) >>> idxs_list, aids_list = _ >>> wx2_idf = compute_idf_label1(wx_series, wx2_idxs, idx2_aid, daids) """ nWords = len(aids_list) # Computes our novel label idf weight lblindex_list = np.array(ut.tuples_to_unique_scalars(daid2_label.values())) #daid2_lblindex = dict(zip(daid_list, lblindex_list)) unique_lblindexes, groupxs = clustertool.group_indices(lblindex_list) daid_list = np.array(daid2_label.keys()) daids_list = [daid_list.take(xs) for xs in groupxs] daid2_wxs = ut.ddict(list) for wx, daids in enumerate(aids_list): for daid in daids: daid2_wxs[daid].append(wx) lblindex2_daids = list(zip(unique_lblindexes, daids_list)) nLabels = len(unique_lblindexes) pcntLblsWithWord = np.zeros(nWords, np.float64) # Get num times word appears for eachlabel for lblindex, daids in lblindex2_daids: nWordsWithLabel = np.zeros(nWords) for daid in daids: wxs = daid2_wxs[daid] nWordsWithLabel[wxs] += 1 pcntLblsWithWord += (1 - nWordsWithLabel.astype(np.float64) / len(daids)) # Labels for each word idf_list = np.log(np.divide(nLabels, np.add(pcntLblsWithWord, 1), dtype=hstypes.FLOAT_TYPE), dtype=hstypes.FLOAT_TYPE) return idf_list
def filter_duplicate_acfgs(expanded_aids_list, acfg_list, acfg_name_list, verbose=ut.NOT_QUIET): """ Removes configs with the same expanded aids list CommandLine: # The following will trigger this function: ibeis -e print_acfg -a timectrl timectrl:view=left --db PZ_MTEST """ from ibeis.expt import annotation_configs acfg_list_ = [] expanded_aids_list_ = [] seen_ = ut.ddict(list) for acfg, (qaids, daids) in zip(acfg_list, expanded_aids_list): key = (ut.hashstr_arr27(qaids, 'qaids'), ut.hashstr_arr27(daids, 'daids')) if key in seen_: seen_[key].append(acfg) continue else: seen_[key].append(acfg) expanded_aids_list_.append((qaids, daids)) acfg_list_.append(acfg) if verbose: duplicate_configs = dict( [(key_, val_) for key_, val_ in seen_.items() if len(val_) > 1]) if len(duplicate_configs) > 0: print('The following configs produced duplicate annnotation configs') for key, val in duplicate_configs.items(): # Print the semantic difference between the duplicate configs _tup = annotation_configs.compress_acfg_list_for_printing(val) nonvaried_compressed_dict, varied_compressed_dict_list = _tup print('+--') print('key = %r' % (key,)) print('duplicate_varied_cfgs = %s' % ( ut.list_str(varied_compressed_dict_list),)) print('duplicate_nonvaried_cfgs = %s' % ( ut.dict_str(nonvaried_compressed_dict),)) print('L__') print('[harn.help] parsed %d / %d unique annot configs from: %r' % ( len(acfg_list_), len(acfg_list), acfg_name_list)) return expanded_aids_list_, acfg_list_
def __init__(back, ibs=None): """ Creates GUIBackend object """ QtCore.QObject.__init__(back) print('[back] MainWindowBackend.__init__()') back.ibs = None back.cfg = None # State variables back.sel_aids = [] back.sel_nids = [] back.sel_gids = [] back.sel_qres = [] back.active_enc = 0 back.query_mode = 'intra_encounter' back.encounter_query_results = utool.ddict(dict) # Create GUIFrontend object back.mainwin = newgui.IBEISMainWindow(back=back, ibs=ibs) back.front = back.mainwin.ibswgt back.ibswgt = back.front # Alias # connect signals and other objects fig_presenter.register_qt4_win(back.mainwin)
def invert_lists(aids, wx_lists, all_wxs=None): """ takes corresponding lists of (aids, wxs) and maps wxs to aids Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.smk.smk_funcs import * # NOQA >>> aids = [1, 2, 3] >>> wx_lists = [[0, 1], [20, 0, 1], [3]] >>> wx_to_aids = invert_lists(aids, wx_lists) >>> result = ('wx_to_aids = %s' % (ut.repr2(wx_to_aids),)) >>> print(result) wx_to_aids = {0: [1, 2], 1: [1, 2], 3: [3], 20: [2]} """ if all_wxs is None: wx_to_aids = ut.ddict(list) else: wx_to_aids = {wx: [] for wx in all_wxs} for aid, wxs in zip(aids, wx_lists): for wx in wxs: wx_to_aids[wx].append(aid) return wx_to_aids
def prepare_annot_pairs(ibs, qaids, daids, qconfig2_, dconfig2_): # Prepare lazy attributes for annotations qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_) dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_) unique_qaids = set(qaids) unique_daids = set(daids) # Determine a unique set of annots per config configured_aids = ut.ddict(set) configured_aids[qannot_cfg].update(unique_qaids) configured_aids[dannot_cfg].update(unique_daids) # Make efficient annot-object representation configured_obj_annots = {} for config, aids in configured_aids.items(): annots = ibs.annots(sorted(list(aids)), config=config) configured_obj_annots[config] = annots.view() # These annot views behave like annot objects # but they use the same internal cache annots1 = configured_obj_annots[qannot_cfg].view(qaids) annots2 = configured_obj_annots[dannot_cfg].view(daids) return annots1, annots2
def match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid): """ >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> invindex = index_data_annots(annots_df, daids, words) >>> qaid = qaids[0] >>> wx2_qfxs, wx2_qrvecs = compute_query_repr(annots_df, qaid, invindex) >>> daid2_totalscore = match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid) """ _daids = invindex.daids idx2_daid = invindex.idx2_daid wx2_drvecs = invindex.wx2_drvecs wx2_weight = invindex.wx2_weight daid2_gamma = invindex.daid2_gamma wx2_rvecs = wx2_qrvecs query_gamma = gamma_summation(wx2_rvecs, wx2_weight) # Accumulate scores over the entire database daid2_aggscore = pd.Series(np.zeros(len(_daids)), index=_daids, name='total_score') common_wxs = set(wx2_qrvecs.keys()).intersection(set(wx2_drvecs.keys())) daid2_wx2_scoremat = utool.ddict(lambda: utool.ddict(list)) # for each word compute the pairwise scores between matches mark, end = utool.log_progress('query word: ', len(common_wxs), flushfreq=100) for count, wx in enumerate(common_wxs): if count % 100 == 0: mark(count) # Query and database vectors for wx-th word qrvecs = wx2_qrvecs[wx] drvecs = wx2_drvecs[wx] # Word Weight weight = wx2_weight[wx] # Compute score matrix qfx2_wscore = Match_N(qrvecs, drvecs) qfx2_wscore.groupby(idx2_daid) # Group scores by database annotation ids group = qfx2_wscore.groupby(idx2_daid, axis=1) for daid, scoremat in group: daid2_wx2_scoremat[daid][wx] = scoremat #qfx2_wscore = pd.DataFrame(qfx2_wscore_, index=qfxs, columns=_idxs) daid2_wscore = weight * qfx2_wscore.sum(axis=0).groupby(idx2_daid).sum() daid2_aggscore = daid2_aggscore.add(daid2_wscore, fill_value=0) daid2_totalscore = daid2_aggscore * daid2_gamma * query_gamma end() daid_fm = {} daid_fs = {} daid_fk = {} mark, end = utool.log_progress('accumulating match info: ', len(daid2_wx2_scoremat), flushfreq=100) for count, item in enumerate(daid2_wx2_scoremat.items()): daid, wx2_scoremat = item if count % 25 == 0: mark(count) fm_accum = [] fs_accum = [] fk_accum = [] for wx, scoremat in wx2_scoremat.iteritems(): qfxs = scoremat.index dfxs = invindex.idx2_dfx[scoremat.columns] fm_ = np.vstack(np.dstack(np.meshgrid(qfxs, dfxs, indexing='ij'))) fs_ = scoremat.values.flatten() lower_thresh = 0.01 valid = [fs_ > lower_thresh] fm = fm_[valid] fs = fs_[valid] fk = np.ones(len(fm), dtype=np.int32) fm_accum.append(fm) fs_accum.append(fs) fk_accum.append(fk) daid_fm[daid] = np.vstack(fm_accum) daid_fs[daid] = np.hstack(fs_accum).T daid_fk[daid] = np.hstack(fk_accum).T chipmatch = (daid_fm, daid_fs, daid_fk,) daid2_totalscore.sort(axis=1, ascending=False) return daid2_totalscore, chipmatch
def find_latest_remote(self): """ Used to update the published dict CommandLine: python -m wbia.algo.verif.vsone find_latest_remote Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.verif.vsone import * # NOQA >>> self = Deployer() >>> task_clf_names = self.find_latest_remote() """ base_url = 'https://{remote}/public/models/pairclf'.format( **self.publish_info) import requests import bs4 resp = requests.get(base_url) soup = bs4.BeautifulSoup(resp.text, 'html.parser') table = soup.findAll('table')[0] def parse_bs_table(table): n_columns = 0 n_rows = 0 column_names = [] # Find number of rows and columns # we also find the column titles if we can for row in table.find_all('tr'): td_tags = row.find_all('td') if len(td_tags) > 0: n_rows += 1 if n_columns == 0: n_columns = len(td_tags) # Handle column names if we find them th_tags = row.find_all('th') if len(th_tags) > 0 and len(column_names) == 0: for th in th_tags: column_names.append(th.get_text()) # Safeguard on Column Titles if len(column_names) > 0 and len(column_names) != n_columns: raise Exception( 'Column titles do not match the number of columns') columns = column_names if len(column_names) > 0 else range( 0, n_columns) import pandas as pd df = pd.DataFrame(columns=columns, index=list(range(0, n_rows))) row_marker = 0 for row in table.find_all('tr'): column_marker = 0 columns = row.find_all('td') for column in columns: df.iat[row_marker, column_marker] = column.get_text().strip() column_marker += 1 if len(columns) > 0: row_marker += 1 return df df = parse_bs_table(table) # Find all available models df = df[df['Name'].map(lambda x: x.endswith('.cPkl'))] # df = df[df['Last modified'].map(len) > 0] fname_fmt = self.fname_fmtstr + '.cPkl' task_clf_candidates = ut.ddict(list) import parse for idx, row in df.iterrows(): fname = basename(row['Name']) result = parse.parse(fname_fmt, fname) if result: task_key = result.named['task_key'] species = result.named['species'] task_clf_candidates[(species, task_key)].append(idx) task_clf_fnames = ut.ddict(dict) for key, idxs in task_clf_candidates.items(): species, task_key = key # Find the classifier most recently created max_idx = ut.argmax(df.loc[idxs]['Last modified'].tolist()) fname = df.loc[idxs[max_idx]]['Name'] task_clf_fnames[species][task_key] = fname logger.info('published = ' + ut.repr2(task_clf_fnames, nl=2)) return task_clf_fnames
def latex_dbstats(ibs_list, **kwargs): r""" Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show python -m ibeis.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1']) >>> ibs_list = [ibeis.opendb(db=db) for db in db_list] >>> tabular_str = latex_dbstats(ibs_list) >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str) >>> ut.copy_text_to_clipboard(tabular_cmd) >>> write_fpath = ut.get_argval('--write', type_=str, default=None) >>> if write_fpath is not None: >>> fpath = ut.truepath(write_fpath) >>> text = ut.readfrom(fpath) >>> new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>') >>> ut.writeto(fpath, new_text) >>> ut.print_code(tabular_cmd, 'latex') >>> ut.quit_if_noshow() >>> ut.render_latex_text('\\noindent \n' + tabular_str) """ import ibeis # Parse for aids test data aids_list = [ibeis.testdata_aids(ibs=ibs) for ibs in ibs_list] #dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list] dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids) for ibs, aids in zip(ibs_list, aids_list)] #title = db_name + ' database statistics' title = 'Database statistics' stat_title = '# Annotations per name (multiton)' #col_lbls = [ # 'multiton', # #'singleton', # 'total', # 'multiton', # 'singleton', # 'total', #] key_to_col_lbls = { 'num_names_multiton': 'multiton', 'num_names_singleton': 'singleton', 'num_names': 'total', 'num_multiton_annots': 'multiton', 'num_singleton_annots': 'singleton', 'num_unknown_annots': 'unknown', 'num_annots': 'total', } # Structure of columns / multicolumns multi_col_keys = [ ('# Names', ( 'num_names_multiton', #'num_names_singleton', 'num_names', )), ('# Annots', ( 'num_multiton_annots', 'num_singleton_annots', #'num_unknown_annots', 'num_annots')), ] #multicol_lbls = [('# Names', 3), ('# Annots', 3)] multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys] # Flatten column labels col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1)) col_lbls = ut.dict_take(key_to_col_lbls, col_keys) row_lbls = [] row_values = [] #stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax'] stat_col_lbls = ['max', 'min', 'mean', 'std', 'med'] #stat_row_lbls = ['# Annot per Name (multiton)'] stat_row_lbls = [] stat_row_values = [] SINGLE_TABLE = False EXTRA = True for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): row_ = ut.dict_take(dbinfo_locals, col_keys) dbname = ibs.get_dbname_alias() row_lbls.append(dbname) multiton_annot_stats = ut.get_stats(dbinfo_locals['multiton_nid2_nannots'], use_median=True) stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls) if SINGLE_TABLE: row_.extend(stat_rows) else: stat_row_lbls.append(dbname) stat_row_values.append(stat_rows) row_values.append(row_) CENTERLINE = False AS_TABLE = True tablekw = dict( astable=AS_TABLE, centerline=CENTERLINE, FORCE_INT=False, precision=2, col_sep='', multicol_sep='|', **kwargs) if EXTRA: extra_keys = [ #'species2_nAids', 'qualtext2_nAnnots', 'yawtext2_nAnnots', ] extra_titles = { 'species2_nAids': 'Annotations per species.', 'qualtext2_nAnnots': 'Annotations per quality.', 'yawtext2_nAnnots': 'Annotations per viewpoint.', } extra_collbls = ut.ddict(list) extra_rowvalues = ut.ddict(list) extra_tables = ut.ddict(list) for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_collbls[key] = ut.unique_ordered(extra_collbls[key] + list(dbinfo_locals[key].keys())) extra_collbls['qualtext2_nAnnots'] = ['excellent', 'good', 'ok', 'poor', 'junk', 'UNKNOWN'] #extra_collbls['yawtext2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None] extra_collbls['yawtext2_nAnnots'] = ['BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', None] for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_rowvalues[key].append(ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0)) qualalias = {'UNKNOWN': None} extra_collbls['yawtext2_nAnnots'] = [ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['yawtext2_nAnnots']] extra_collbls['qualtext2_nAnnots'] = [qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots']] for key in extra_keys: extra_tables[key] = ut.util_latex.make_score_tabular( row_lbls, extra_collbls[key], extra_rowvalues[key], title=extra_titles[key], col_align='r', table_position='[h!]', **tablekw) #tabular_str = util_latex.tabular_join(tabular_body_list) if SINGLE_TABLE: col_lbls += stat_col_lbls multicol_lbls += [(stat_title, len(stat_col_lbls))] count_tabular_str = ut.util_latex.make_score_tabular( row_lbls, col_lbls, row_values, title=title, multicol_lbls=multicol_lbls, table_position='[ht!]', **tablekw) #print(row_lbls) if SINGLE_TABLE: tabular_str = count_tabular_str else: stat_tabular_str = ut.util_latex.make_score_tabular( stat_row_lbls, stat_col_lbls, stat_row_values, title=stat_title, col_align='r', table_position='[h!]', **tablekw) # Make a table of statistics if tablekw['astable']: tablesep = '\n%--\n' else: tablesep = '\\\\\n%--\n' if EXTRA: tabular_str = tablesep.join([count_tabular_str, stat_tabular_str] + ut.dict_take(extra_tables, extra_keys)) else: tabular_str = tablesep.join([count_tabular_str, stat_tabular_str]) return tabular_str
def bigcache_vsone(qreq_, hyper_params): """ Cached output of one-vs-one matches >>> from wbia.scripts.script_vsone import * # NOQA >>> self = OneVsOneProblem() >>> qreq_ = self.qreq_ >>> hyper_params = self.hyper_params """ import vtool as vt import wbia # Get a set of training pairs ibs = qreq_.ibs cm_list = qreq_.execute() infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True) # Per query choose a set of correct, incorrect, and random training pairs aid_pairs_ = infr._cm_training_pairs( rng=np.random.RandomState(42), **hyper_params.pair_sample ) aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist() pb_aid_pairs_ = photobomb_samples(ibs) # TODO: try to add in more non-comparable samples aid_pairs_ = pb_aid_pairs_ + aid_pairs_ aid_pairs_ = vt.unique_rows(np.array(aid_pairs_)) # ====================================== # Compute one-vs-one scores and local_measures # ====================================== # Prepare lazy attributes for annotations qreq_ = infr.qreq_ ibs = qreq_.ibs qconfig2_ = qreq_.extern_query_config2 dconfig2_ = qreq_.extern_data_config2 qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_) dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_) # Remove any pairs missing features if dannot_cfg == qannot_cfg: unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg) bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids bad_aids = set(bad_aids) else: annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg) annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg) bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids bad_aids = set(bad_aids1 + bad_aids2) subset_idxs = np.where( [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_] )[0] # Keep only a random subset if hyper_params.subsample: rng = np.random.RandomState(3104855634) num_max = hyper_params.subsample if num_max < len(subset_idxs): subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False) subset_idxs = sorted(subset_idxs) # Take the current selection aid_pairs = ut.take(aid_pairs_, subset_idxs) if True: # NEW WAY config = hyper_params.vsone_assign # TODO: ensure annot probs like chips and features can be appropriately # set via qreq_ config or whatever matches = infr.exec_vsone_subset(aid_pairs, config=config) else: query_aids = ut.take_column(aid_pairs, 0) data_aids = ut.take_column(aid_pairs, 1) # OLD WAY # Determine a unique set of annots per config configured_aids = ut.ddict(set) configured_aids[qannot_cfg].update(query_aids) configured_aids[dannot_cfg].update(data_aids) # Make efficient annot-object representation configured_obj_annots = {} for config, aids in configured_aids.items(): annots = ibs.annots(sorted(list(aids)), config=config) configured_obj_annots[config] = annots annots1 = configured_obj_annots[qannot_cfg].loc(query_aids) annots2 = configured_obj_annots[dannot_cfg].loc(data_aids) # Get hash based on visual annotation appearence of each pair # as well as algorithm configurations used to compute those properties qvuuids = annots1.visual_uuids dvuuids = annots2.visual_uuids qcfgstr = annots1._config.get_cfgstr() dcfgstr = annots2._config.get_cfgstr() annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr) vsone_uuids = [ ut.combine_uuids(uuids, salt=annots_cfgstr) for uuids in ut.ProgIter( zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids' ) ] # Combine into a big cache for the entire 1-v-1 matching run big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True) cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train') cached_data = cacher.tryload() if cached_data is not None: # Caching doesn't work 100% for PairwiseMatch object, so we need to do # some postprocessing configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot_dict[_annot.aid] = _annot._make_lazy_dict() # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # Create a set of PairwiseMatches with the correct annot properties matches = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Updating a new matches dictionary ensure the annot1/annot2 properties # are set correctly for key, cached_matches in list(cached_data.items()): fixed_matches = [match.copy() for match in matches] for fixed, internal in zip(fixed_matches, cached_matches): dict_ = internal.__dict__ ut.delete_dict_keys(dict_, ['annot1', 'annot2']) fixed.__dict__.update(dict_) cached_data[key] = fixed_matches else: cached_data = vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ) cacher.save(cached_data) # key_ = 'SV_LNBNN' key_ = 'RAT_SV' # for key in list(cached_data.keys()): # if key != 'SV_LNBNN': # del cached_data[key] matches = cached_data[key_] return matches, infr
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
def draw_bayesian_model(model, evidence={}, soft_evidence={}, fnum=None, pnum=None, **kwargs): from pgmpy.models import BayesianModel if not isinstance(model, BayesianModel): model = model.to_bayesian_model() import plottool as pt import networkx as nx kwargs = kwargs.copy() factor_list = kwargs.pop('factor_list', []) ttype_colors, ttype_scalars = make_colorcodes(model) textprops = { 'horizontalalignment': 'left', 'family': 'monospace', 'size': 8, } # build graph attrs tup = get_node_viz_attrs( model, evidence, soft_evidence, factor_list, ttype_colors, **kwargs) node_color, pos_list, pos_dict, takws = tup # draw graph has_infered = evidence or 'factor_list' in kwargs if False: fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True) # NOQA ax = pt.gca() drawkw = dict(pos=pos_dict, ax=ax, with_labels=True, node_size=1100, node_color=node_color) nx.draw(model, **drawkw) else: # BE VERY CAREFUL if 1: graph = model.copy() graph.__class__ = nx.DiGraph graph.graph['groupattrs'] = ut.ddict(dict) #graph = model. if getattr(graph, 'ttype2_cpds', None) is not None: # Add invis edges and ttype groups for ttype in model.ttype2_cpds.keys(): ttype_cpds = model.ttype2_cpds[ttype] # use defined ordering ttype_nodes = ut.list_getattr(ttype_cpds, 'variable') # ttype_nodes = sorted(ttype_nodes) invis_edges = list(ut.itertwo(ttype_nodes)) graph.add_edges_from(invis_edges) nx.set_edge_attributes(graph, 'style', {edge: 'invis' for edge in invis_edges}) nx.set_node_attributes(graph, 'groupid', {node: ttype for node in ttype_nodes}) graph.graph['groupattrs'][ttype]['rank'] = 'same' graph.graph['groupattrs'][ttype]['cluster'] = False else: graph = model pt.show_nx(graph, layout_kw={'prog': 'dot'}, fnum=fnum, pnum=pnum, verbose=0) pt.zoom_factory() fig = pt.gcf() ax = pt.gca() pass hacks = [pt.draw_text_annotations(textprops=textprops, **takw) for takw in takws if takw] xmin, ymin = np.array(pos_list).min(axis=0) xmax, ymax = np.array(pos_list).max(axis=0) if 'name' in model.ttype2_template: num_names = len(model.ttype2_template['name'].basis) num_annots = len(model.ttype2_cpds['name']) if num_annots > 4: ax.set_xlim((xmin - 40, xmax + 40)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(30, 7) else: ax.set_xlim((xmin - 42, xmax + 42)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(23, 7) title = 'num_names=%r, num_annots=%r' % (num_names, num_annots,) else: title = '' map_assign = kwargs.get('map_assign', None) def word_insert(text): return '' if len(text) == 0 else text + ' ' top_assignments = kwargs.get('top_assignments', None) if top_assignments is not None: map_assign, map_prob = top_assignments[0] if map_assign is not None: title += '\n%sMAP: ' % (word_insert(kwargs.get('method', ''))) title += map_assign + ' @' + '%.2f%%' % (100 * map_prob,) if kwargs.get('show_title', True): pt.set_figtitle(title, size=14) for hack in hacks: hack() if has_infered: # Hack in colorbars # if ut.list_type(basis) is int: # pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1) # else: # pt.colorbar(scalars, colors, lbl='score', ticklabels=basis) keys = ['name', 'score'] locs = ['left', 'right'] for key, loc in zip(keys, locs): if key in ttype_colors: basis = model.ttype2_template[key].basis # scalars = colors = ttype_colors[key] scalars = ttype_scalars[key] pt.colorbar(scalars, colors, lbl=key, ticklabels=basis, ticklocation=loc)
def compute_negentropy_names(aids_list, daid2_label): r""" One of our idf extensions Word weighting based on the negative entropy over all names of p(n_i | word) Args: aids_list (list of aids): daid2_label (dict from daid to label): Returns: negentropy_list (ndarray[float32]): idf-like weighting for each word based on the negative entropy Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> wx_series = np.arange(len(invindex.words)) >>> idx2_aid = invindex.idx2_daid >>> daid2_label = invindex.daid2_label >>> _ = helper_idf_wordgroup(wx2_idxs, idx2_aid, wx_series) >>> idxs_list, aids_list = _ Math:: p(n_i | \word) = \sum_{\lbl \in L_i} p(\lbl | \word) p(\lbl | \word) = \frac{p(\word | \lbl) p(\lbl)}{p(\word)} p(\word) = \sum_{\lbl' \in L} p(\word | \lbl') p(\lbl') p(\word | \lbl) = NumAnnotOfLabelWithWord / NumAnnotWithLabel = \frac{\sum_{\X \in \DB_\lbl} b(\word, \X)}{\card{\DB_\lbl}} h(n_i | word) = -\sum_{i=1}^N p(n_i | \word) \log p(n_i | \word) word_weight = log(N) - h(n | word) CommandLine: python dev.py -t smk2 --allgt --db GZ_ALL python dev.py -t smk5 --allgt --db GZ_ALL Auto: python -c "import utool as ut; ut.print_auto_docstr('ibeis.algo.hots.smk.smk_index', 'compute_negentropy_names')" """ nWords = len(aids_list) # --- LABEL MEMBERS w.r.t daids --- # compute mapping from label to daids # Translate tuples into scalars for efficiency label_list = list(daid2_label.values()) lblindex_list = np.array(ut.tuples_to_unique_scalars(label_list)) #daid2_lblindex = dict(zip(daid_list, lblindex_list)) unique_lblindexes, groupxs = clustertool.group_indices(lblindex_list) daid_list = np.array(daid2_label.keys()) daids_list = [daid_list.take(xs) for xs in groupxs] # --- DAID MEMBERS w.r.t. words --- # compute mapping from daid to word indexes # finds all the words that belong to an annotation daid2_wxs = ut.ddict(list) for wx, _daids in enumerate(aids_list): for daid in _daids: daid2_wxs[daid].append(wx) # --- \Pr(\word \given \lbl) for each label --- # Compute the number of annotations in a label with the word vs # the number of annotations in the label lblindex2_daids = list(zip(unique_lblindexes, daids_list)) # Get num times word appears for each label probWordGivenLabel_list = [] for lblindex, _daids in lblindex2_daids: nAnnotOfLabelWithWord = np.zeros(nWords, dtype=np.int32) for daid in _daids: wxs = np.unique(daid2_wxs[daid]) nAnnotOfLabelWithWord[wxs] += 1 probWordGivenLabel = nAnnotOfLabelWithWord.astype(np.float64) / len(_daids) probWordGivenLabel_list.append(probWordGivenLabel) # (nLabels, nWords) probWordGivenLabel_arr = np.array(probWordGivenLabel_list) # --- \Pr(\lbl \given \word) --- # compute partition function that approximates probability of a word # (1, nWords) probWord = probWordGivenLabel_arr.sum(axis=0) probWord.shape = (1, probWord.size) # (nLabels, nWords) probLabelGivenWord_arr = (probWordGivenLabel_arr / probWord) # --- \Pr(\name \given \lbl) --- # get names for each unique label nid_list = np.array([label_list[xs[0]][0] for xs in groupxs]) unique_nids, groupxs_ = clustertool.group_indices(nid_list) # (nNames, nWords) # add a little wiggle room eps = 1E-9 # http://stackoverflow.com/questions/872544/precision-of-floating-point #epsilon = 2^(E-52) % For a 64-bit float (double precision) #epsilon = 2^(E-23) % For a 32-bit float (single precision) #epsilon = 2^(E-10) % For a 16-bit float (half precision) probNameGivenWord = eps + (1.0 - eps) * np.array([probLabelGivenWord_arr.take(xs, axis=0).sum(axis=0) for xs in groupxs_]) logProbNameGivenWord = np.log(probNameGivenWord) wordNameEntropy = -(probNameGivenWord * logProbNameGivenWord).sum(0) # Compute negative entropy for weights nNames = len(nid_list) negentropy_list = np.log(nNames) - wordNameEntropy return negentropy_list
def make_run_tests_script_text(test_headers, test_argvs, quick_tests=None, repodir=None, exclude_list=[]): """ Autogeneration function TODO move to util_autogen or just depricate Examples: >>> from utool.util_tests import * # NOQA >>> import utool # NOQA >>> testdirs = ['~/code/ibeis/test_ibs*.py'] """ import utool as ut from os.path import relpath, join, dirname # NOQA exclude_list += ['__init__.py'] # General format of the testing script script_fmtstr = ut.codeblock( r''' #!/bin/bash # Runs all tests # Win32 path hacks export CWD=$(pwd) export PYMAJOR="$(python -c "import sys; print(sys.version_info[0])")" # <CORRECT_PYTHON> # GET CORRECT PYTHON ON ALL PLATFORMS export SYSNAME="$(expr substr $(uname -s) 1 10)" if [ "$SYSNAME" = "MINGW32_NT" ]; then export PYEXE=python else if [ "$PYMAJOR" = "3" ]; then # virtual env? export PYEXE=python else export PYEXE=python2.7 fi fi # </CORRECT_PYTHON> PRINT_DELIMETER() {{ printf "\n#\n#\n#>>>>>>>>>>> next_test\n\n" }} export TEST_ARGV="{test_argvs} $@" {dirdef_block} # Default tests to run set_test_flags() {{ export DEFAULT=$1 {testdefault_block} }} set_test_flags OFF {testdefaulton_block} # Parse for bash commandline args for i in "$@" do case $i in --testall) set_test_flags ON ;; esac {testcmdline_block} done BEGIN_TESTS() {{ cat <<EOF {runtests_bubbletext} EOF echo "BEGIN: TEST_ARGV=$TEST_ARGV" PRINT_DELIMETER num_passed=0 num_ran=0 export FAILED_TESTS='' }} RUN_TEST() {{ echo "RUN_TEST: $@" export TEST="$PYEXE $@ $TEST_ARGV" $TEST export RETURN_CODE=$? echo "RETURN_CODE=$RETURN_CODE" PRINT_DELIMETER num_ran=$(($num_ran + 1)) if [ "$RETURN_CODE" == "0" ] ; then num_passed=$(($num_passed + 1)) fi if [ "$RETURN_CODE" != "0" ] ; then export FAILED_TESTS="$FAILED_TESTS\n$TEST" fi }} END_TESTS() {{ echo "RUN_TESTS: DONE" if [ "$FAILED_TESTS" != "" ] ; then echo "-----" printf "Failed Tests:" printf "$FAILED_TESTS\n" printf "$FAILED_TESTS\n" >> failed_shelltests.txt echo "-----" fi echo "$num_passed / $num_ran tests passed" }} #--------------------------------------------- # START TESTS BEGIN_TESTS {quicktest_block} {test_block} #--------------------------------------------- # END TESTING END_TESTS ''') testcmdline_fmtstr = ut.codeblock( r''' case $i in --notest{header_lower}) export {testflag}=OFF ;; esac case $i in --test{header_lower}) export {testflag}=ON ;; esac ''') header_test_block_fmstr = ut.codeblock( r''' #--------------------------------------------- #{header_text} if [ "${testflag}" = "ON" ] ; then cat <<EOF {header_bubble_text} EOF {testlines_block} fi ''') #specialargv = '--noshow' specialargv = '' testline_fmtstr = 'RUN_TEST ${dirvar}/{fpath} {specialargv}' testline_fmtstr2 = 'RUN_TEST {fpath} {specialargv}' def format_testline(fpath, dirvar): if dirvar is None: return testline_fmtstr2.format(fpath=fpath, specialargv=specialargv) else: return testline_fmtstr.format(dirvar=dirvar, fpath=fpath, specialargv=specialargv) default_flag_line_list = [] defaulton_flag_line_list = [] testcmdline_list = [] dirdef_list = [] header_test_block_list = [] known_tests = ut.ddict(list) # Tests to always run if quick_tests is not None: quicktest_block = '\n'.join( ['# Quick Tests (always run)'] + ['RUN_TEST ' + testline for testline in quick_tests]) else: quicktest_block = '# No quick tests' # Loop over different test types for testdef_tup in test_headers: header, default, modname, dpath, pats, testcmds = testdef_tup # Build individual test type information header_upper = header.upper() header_lower = header.lower() testflag = header_upper + '_TEST' if modname is not None: dirvar = header_upper + '_DIR' dirdef = ''.join([ 'export {dirvar}=$($PYEXE -c "', 'import os, {modname};', 'print(str(os.path.dirname(os.path.dirname({modname}.__file__))))', '")']).format(dirvar=dirvar, modname=modname) dirdef_list.append(dirdef) else: dirvar = None # Build test dir #dirvar = header_upper + '_DIR' #dirdef = 'export {dirvar}={dirname}'.format(dirvar=dirvar, dirname=dirname) #dirdef_list.append(dirdef) # Build command line flags default_flag_line = 'export {testflag}=$DEFAULT'.format(testflag=testflag) if default: defaulton_flag_line = 'export {testflag}=ON'.format(testflag=testflag) defaulton_flag_line_list.append(defaulton_flag_line) testcmdline_fmtdict = dict(header_lower=header_lower, testflag=testflag,) testcmdline = testcmdline_fmtstr.format(**testcmdline_fmtdict) #ut.ls(dpath) # VERY HACK BIT OF CODE # Get list of tests from patterns if testcmds is None: if modname is not None: module = __import__(modname) repo_path = dirname(dirname(module.__file__)) else: repo_path = repodir dpath_ = ut.unixpath(util_path.unixjoin(repo_path, dpath)) if header_upper == 'OTHER': # Hacky way to grab any other tests not explicitly seen in this directory _testfpath_list = list(set(ut.glob(dpath_, '*.py')) - set(known_tests[dpath_])) #_testfpath_list = ut.glob(dpath_, '*.py') #set(known_tests[dpath_]) else: _testfpath_list = ut.flatten([ut.glob(dpath_, pat) for pat in pats]) def not_excluded(x): return not any([x.find(exclude) > -1 for exclude in exclude_list]) _testfpath_list = list(filter(not_excluded, _testfpath_list)) known_tests[dpath_].extend(_testfpath_list) #print(_testfpath_list) testfpath_list = [util_path.unixjoin(dpath, relpath(fpath, dpath_)) for fpath in _testfpath_list] testline_list = [format_testline(fpath, dirvar) for fpath in testfpath_list] else: testline_list = testcmds testlines_block = ut.indentjoin(testline_list).strip('\n') # Construct test block for this type header_text = header_upper + ' TESTS' headerfont = 'cybermedium' header_bubble_text = ut.indent(ut.bubbletext(header_text, headerfont).strip()) header_test_block_dict = dict( testflag=testflag, header_text=header_text, testlines_block=testlines_block, header_bubble_text=header_bubble_text,) header_test_block = header_test_block_fmstr.format(**header_test_block_dict) # Append to script lists header_test_block_list.append(header_test_block) default_flag_line_list.append(default_flag_line) testcmdline_list.append(testcmdline) runtests_bubbletext = ut.bubbletext('RUN TESTS', 'cyberlarge') test_block = '\n'.join(header_test_block_list) dirdef_block = '\n'.join(dirdef_list) testdefault_block = ut.indent('\n'.join(default_flag_line_list)) testdefaulton_block = '\n'.join(defaulton_flag_line_list) testcmdline_block = '\n'.join(testcmdline_list) script_fmtdict = dict( quicktest_block=quicktest_block, runtests_bubbletext=runtests_bubbletext, test_argvs=test_argvs, dirdef_block=dirdef_block, testdefault_block=testdefault_block, testdefaulton_block=testdefaulton_block, testcmdline_block=testcmdline_block, test_block=test_block,) script_text = script_fmtstr.format(**script_fmtdict) return script_text
def draw_bayesian_model(model, evidence={}, soft_evidence={}, fnum=None, pnum=None, **kwargs): from pgmpy.models import BayesianModel if not isinstance(model, BayesianModel): model = model.to_bayesian_model() import plottool as pt import networkx as nx kwargs = kwargs.copy() factor_list = kwargs.pop('factor_list', []) ttype_colors, ttype_scalars = make_colorcodes(model) textprops = { 'horizontalalignment': 'left', 'family': 'monospace', 'size': 8, } # build graph attrs tup = get_node_viz_attrs(model, evidence, soft_evidence, factor_list, ttype_colors, **kwargs) node_color, pos_list, pos_dict, takws = tup # draw graph has_infered = evidence or 'factor_list' in kwargs if False: fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True) # NOQA ax = pt.gca() drawkw = dict(pos=pos_dict, ax=ax, with_labels=True, node_size=1100, node_color=node_color) nx.draw(model, **drawkw) else: # BE VERY CAREFUL if 1: graph = model.copy() graph.__class__ = nx.DiGraph graph.graph['groupattrs'] = ut.ddict(dict) #graph = model. if getattr(graph, 'ttype2_cpds', None) is not None: # Add invis edges and ttype groups for ttype in model.ttype2_cpds.keys(): ttype_cpds = model.ttype2_cpds[ttype] # use defined ordering ttype_nodes = ut.list_getattr(ttype_cpds, 'variable') # ttype_nodes = sorted(ttype_nodes) invis_edges = list(ut.itertwo(ttype_nodes)) graph.add_edges_from(invis_edges) nx.set_edge_attributes( graph, 'style', {edge: 'invis' for edge in invis_edges}) nx.set_node_attributes( graph, 'groupid', {node: ttype for node in ttype_nodes}) graph.graph['groupattrs'][ttype]['rank'] = 'same' graph.graph['groupattrs'][ttype]['cluster'] = False else: graph = model pt.show_nx(graph, layout_kw={'prog': 'dot'}, fnum=fnum, pnum=pnum, verbose=0) pt.zoom_factory() fig = pt.gcf() ax = pt.gca() pass hacks = [ pt.draw_text_annotations(textprops=textprops, **takw) for takw in takws if takw ] xmin, ymin = np.array(pos_list).min(axis=0) xmax, ymax = np.array(pos_list).max(axis=0) if 'name' in model.ttype2_template: num_names = len(model.ttype2_template['name'].basis) num_annots = len(model.ttype2_cpds['name']) if num_annots > 4: ax.set_xlim((xmin - 40, xmax + 40)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(30, 7) else: ax.set_xlim((xmin - 42, xmax + 42)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(23, 7) title = 'num_names=%r, num_annots=%r' % ( num_names, num_annots, ) else: title = '' map_assign = kwargs.get('map_assign', None) def word_insert(text): return '' if len(text) == 0 else text + ' ' top_assignments = kwargs.get('top_assignments', None) if top_assignments is not None: map_assign, map_prob = top_assignments[0] if map_assign is not None: title += '\n%sMAP: ' % (word_insert(kwargs.get('method', ''))) title += map_assign + ' @' + '%.2f%%' % (100 * map_prob, ) if kwargs.get('show_title', True): pt.set_figtitle(title, size=14) for hack in hacks: hack() if has_infered: # Hack in colorbars # if ut.list_type(basis) is int: # pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1) # else: # pt.colorbar(scalars, colors, lbl='score', ticklabels=basis) keys = ['name', 'score'] locs = ['left', 'right'] for key, loc in zip(keys, locs): if key in ttype_colors: basis = model.ttype2_template[key].basis # scalars = colors = ttype_colors[key] scalars = ttype_scalars[key] pt.colorbar(scalars, colors, lbl=key, ticklabels=basis, ticklocation=loc)
def print_acfg_list(acfg_list, expanded_aids_list=None, ibs=None, combined=False, **kwargs): r""" Args: acfg_list (list): expanded_aids_list (list): (default = None) ibs (IBEISController): ibeis controller object(default = None) combined (bool): (default = False) CommandLine: python -m ibeis.expt.annotation_configs --exec-print_acfg_list --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.expt.annotation_configs import * # NOQA >>> import ibeis >>> acfg_list = '?' >>> expanded_aids_list = None >>> ibs = None >>> combined = False >>> result = print_acfg_list(acfg_list, expanded_aids_list, ibs, combined) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ _tup = compress_acfg_list_for_printing(acfg_list) nonvaried_compressed_dict, varied_compressed_dict_list = _tup ut.colorprint('+=== <Info acfg_list> ===', 'white') #print('Printing acfg_list info. len(acfg_list) = %r' % (len(acfg_list),)) print('non-varied aidcfg = ' + ut.dict_str(nonvaried_compressed_dict)) seen_ = ut.ddict(list) # get default kwkeys for annot info if ibs is not None: annotstats_kw = kwargs.copy() kwkeys = ut.parse_func_kwarg_keys(ibs.get_annot_stats_dict) annotstats_kw.update(ut.argparse_dict( dict(zip(kwkeys, [None] * len(kwkeys))), only_specified=True)) for acfgx in range(len(acfg_list)): acfg = acfg_list[acfgx] title = ('q_cfgname=' + acfg['qcfg']['_cfgname'] + ' d_cfgname=' + acfg['dcfg']['_cfgname']) ut.colorprint('+--- acfg %d / %d -- %s ---- ' % (acfgx + 1, len(acfg_list), title), 'lightgray') print('acfg = ' + ut.dict_str(varied_compressed_dict_list[acfgx], strvals=True)) if expanded_aids_list is not None: qaids, daids = expanded_aids_list[acfgx] key = (ut.hashstr_arr27(qaids, 'qaids'), ut.hashstr_arr27(daids, 'daids')) if key not in seen_: if ibs is not None: seen_[key].append(acfgx) annotconfig_stats_strs, _ = ibs.get_annotconfig_stats( qaids, daids, verbose=True, combined=combined, **annotstats_kw) else: dupindex = seen_[key] print('DUPLICATE of index %r' % (dupindex,)) dupdict = varied_compressed_dict_list[dupindex[0]] print('DUP OF acfg = ' + ut.dict_str(dupdict, strvals=True)) ut.colorprint('L___ </Info acfg_list> ___', 'white')
from six.moves import range, zip, map # NOQA from ibeis.algo.hots import _pipeline_helpers as plh # NOQA from ibeis.algo.hots.neighbor_index import NeighborIndex, get_support_data (print, rrr, profile) = ut.inject2(__name__, '[neighbor_index]', DEBUG=False) USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs') NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE # LRU cache for nn_indexers. Ensures that only a few are ever in memory #MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2) MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1) # Background process for building indexes CURRENT_THREAD = None # Global map to keep track of UUID lists with prebuild indexers. UUID_MAP = ut.ddict(dict) NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE) class UUIDMapHyrbridCache(object): """ Class that lets multiple ways of writing to the uuid_map be swapped in and out interchangably TODO: the global read / write should periodically sync itself to disk and it should be loaded from disk initially """ def __init__(self): self.uuid_maps = ut.ddict(dict) #self.uuid_map_fpath = uuid_map_fpath #self.init(uuid_map_fpath, min_reindex_thresh)
def match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid): """ >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> invindex = index_data_annots(annots_df, daids, words) >>> qaid = qaids[0] >>> wx2_qfxs, wx2_qrvecs = compute_query_repr(annots_df, qaid, invindex) >>> daid2_totalscore = match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid) """ _daids = invindex.daids idx2_daid = invindex.idx2_daid wx2_drvecs = invindex.wx2_drvecs wx2_weight = invindex.wx2_weight daid2_gamma = invindex.daid2_gamma wx2_rvecs = wx2_qrvecs query_gamma = gamma_summation(wx2_rvecs, wx2_weight) # Accumulate scores over the entire database daid2_aggscore = pd.Series(np.zeros(len(_daids)), index=_daids, name='total_score') common_wxs = set(wx2_qrvecs.keys()).intersection(set(wx2_drvecs.keys())) daid2_wx2_scoremat = utool.ddict(lambda: utool.ddict(list)) # for each word compute the pairwise scores between matches mark, end = utool.log_progress('query word: ', len(common_wxs), flushfreq=100) for count, wx in enumerate(common_wxs): if count % 100 == 0: mark(count) # Query and database vectors for wx-th word qrvecs = wx2_qrvecs[wx] drvecs = wx2_drvecs[wx] # Word Weight weight = wx2_weight[wx] # Compute score matrix qfx2_wscore = Match_N(qrvecs, drvecs) qfx2_wscore.groupby(idx2_daid) # Group scores by database annotation ids group = qfx2_wscore.groupby(idx2_daid, axis=1) for daid, scoremat in group: daid2_wx2_scoremat[daid][wx] = scoremat #qfx2_wscore = pd.DataFrame(qfx2_wscore_, index=qfxs, columns=_idxs) daid2_wscore = weight * qfx2_wscore.sum( axis=0).groupby(idx2_daid).sum() daid2_aggscore = daid2_aggscore.add(daid2_wscore, fill_value=0) daid2_totalscore = daid2_aggscore * daid2_gamma * query_gamma end() daid_fm = {} daid_fs = {} daid_fk = {} mark, end = utool.log_progress('accumulating match info: ', len(daid2_wx2_scoremat), flushfreq=100) for count, item in enumerate(daid2_wx2_scoremat.items()): daid, wx2_scoremat = item if count % 25 == 0: mark(count) fm_accum = [] fs_accum = [] fk_accum = [] for wx, scoremat in wx2_scoremat.iteritems(): qfxs = scoremat.index dfxs = invindex.idx2_dfx[scoremat.columns] fm_ = np.vstack(np.dstack(np.meshgrid(qfxs, dfxs, indexing='ij'))) fs_ = scoremat.values.flatten() lower_thresh = 0.01 valid = [fs_ > lower_thresh] fm = fm_[valid] fs = fs_[valid] fk = np.ones(len(fm), dtype=np.int32) fm_accum.append(fm) fs_accum.append(fs) fk_accum.append(fk) daid_fm[daid] = np.vstack(fm_accum) daid_fs[daid] = np.hstack(fs_accum).T daid_fk[daid] = np.hstack(fk_accum).T chipmatch = ( daid_fm, daid_fs, daid_fk, ) daid2_totalscore.sort(axis=1, ascending=False) return daid2_totalscore, chipmatch
def init_tablecache(): #return utool.ddict(ColumnsCache) return utool.ddict(lambda: utool.ddict(dict))
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, alpha=3, thresh=0): """ Internals step4 Computes gamma normalization scalar for the database annotations >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_weight = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache) """ # Gropuing by aid and words wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma Group: ', len(wx_sublist), flushfreq=100, writefreq=50) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) # Group by daids first and then by word index for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = smk_speed.group_indicies(aids) rvecs_group = smk_speed.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum: ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25) aid_list = list(daid2_wx2_drvecs.keys()) wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) aidwxs_list = [list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list] aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list] aidweight_list = [[wx2_weight[wx] for wx in aidwxs] for aidwxs in aidwxs_list] #gamma_list = [] #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list): # assert len(weight_list) == len(rvecs_list), 'one list for each word' # gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) # 66.8 % # #weight_list = np.ones(weight_list.size) # gamma_list.append(gamma) gamma_list = [smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list)] daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') if utool.VERBOSE: end2_() return daid2_gamma
from ibeis.algo.hots import _pipeline_helpers as plh # NOQA from ibeis.algo.hots.neighbor_index import NeighborIndex, get_support_data (print, rrr, profile) = ut.inject2(__name__) USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs') NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE # LRU cache for nn_indexers. Ensures that only a few are ever in memory #MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2) MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1) # Background process for building indexes CURRENT_THREAD = None # Global map to keep track of UUID lists with prebuild indexers. UUID_MAP = ut.ddict(dict) NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE) class UUIDMapHyrbridCache(object): """ Class that lets multiple ways of writing to the uuid_map be swapped in and out interchangably TODO: the global read / write should periodically sync itself to disk and it should be loaded from disk initially """ def __init__(self): self.uuid_maps = ut.ddict(dict) #self.uuid_map_fpath = uuid_map_fpath #self.init(uuid_map_fpath, min_reindex_thresh)
def __init__(self): self.uuid_maps = ut.ddict(dict)