def build_matches_sep(X_fxs, Y_fxs, scores_list): r""" Just build matches. Scores have already been broken up. No need to do that. Returns: tuple: (fm, fs) CommandLine: python -m wbia.algo.smk.smk_funcs build_matches_agg --show Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.smk.smk_funcs import * # NOQA >>> map_int = ut.partial(ut.lmap, ut.partial(np.array, dtype=np.int32)) >>> map_float = ut.partial(ut.lmap, ut.partial(np.array, dtype=np.float32)) >>> X_fxs = map_int([[0, 1], [2, 3, 4], [5]]) >>> Y_fxs = map_int([[8], [0, 4], [99]]) >>> scores_list = map_float([ >>> [[.1], [.2],], >>> [[.3, .4], [.4, .6], [.5, .9],], >>> [[.4]], >>> ]) >>> (fm, fs) = build_matches_sep(X_fxs, Y_fxs, scores_list) >>> print('fm = ' + ut.repr2(fm)) >>> print('fs = ' + ut.repr2(fs)) >>> assert len(fm) == len(fs) >>> assert np.isclose(np.sum(ut.total_flatten(scores_list)), fs.sum()) """ fs = np.array(ut.total_flatten(scores_list), dtype=np.float32) unflat_fm = (ut.product(fxs1, fxs2) for fxs1, fxs2 in zip(X_fxs, Y_fxs)) fm = np.array(ut.flatten(unflat_fm), dtype=np.int32) isvalid = np.greater(fs, 0) fm = fm.compress(isvalid, axis=0) fs = fs.compress(isvalid, axis=0) return fm, fs
def expand(sample, denc_per_name=[1], extra_dbsize_fracs=[0]): # Vary the number of database encounters in each sample target_daids_list = [] target_info_list_ = [] for num in denc_per_name: dname_encs_ = ut.take_column(sample.dname_encs, slice(0, num)) dnames_ = ut.lmap(ut.flatten, dname_encs_) daids_ = ut.total_flatten(dname_encs_) target_daids_list.append(daids_) name_lens = ut.lmap(len, dnames_) dpername = name_lens[0] if ut.allsame(name_lens) else np.mean( name_lens) target_info_list_.append( ut.odict([ ('qsize', len(sample.qaids)), ('t_n_names', len(dname_encs_)), ('t_dpername', dpername), ('t_denc_pername', num), ('t_dsize', len(daids_)), ])) # Append confusors to maintain a constant dbsize in each base sample dbsize_list = ut.lmap(len, target_daids_list) max_dsize = max(dbsize_list) n_need = max_dsize - min(dbsize_list) n_extra_avail = len(sample.confusor_pool) - n_need assert len(sample.confusor_pool) > n_need, 'not enough confusors' padded_daids_list = [] padded_info_list_ = [] for daids_, info_ in zip(target_daids_list, target_info_list_): num_take = max_dsize - len(daids_) pad_aids = sample.confusor_pool[:num_take] new_aids = daids_ + pad_aids info_ = info_.copy() info_['n_pad'] = len(pad_aids) info_['pad_dsize'] = len(new_aids) padded_info_list_.append(info_) padded_daids_list.append(new_aids) # Vary the dbsize by appending extra confusors if extra_dbsize_fracs is None: extra_dbsize_fracs = [1.0] extra_fracs = np.array(extra_dbsize_fracs) n_extra_list = np.unique(extra_fracs * n_extra_avail).astype(np.int) daids_list = [] info_list = [] for n in n_extra_list: for daids_, info_ in zip(padded_daids_list, padded_info_list_): extra_aids = sample.confusor_pool[len(sample.confusor_pool) - n:] daids = sorted(daids_ + extra_aids) daids_list.append(daids) info = info_.copy() info['n_extra'] = len(extra_aids) info['dsize'] = len(daids) info_list.append(info) import pandas as pd verbose = 0 if verbose: logger.info(pd.DataFrame.from_records(info_list)) logger.info('#qaids = %r' % (len(sample.qaids), )) logger.info('num_need = %r' % (n_need, )) logger.info('max_dsize = %r' % (max_dsize, )) return sample.qaids, daids_list, info_list
def merge_level_order(level_orders, topsort): """ Merge orders of individual subtrees into a total ordering for computation. >>> level_orders = { >>> 'multi_chip_multitest': [['dummy_annot'], ['chip'], ['multitest'], >>> ['multitest_score'], ], >>> 'multi_fgweight_multitest': [ ['dummy_annot'], ['chip', 'probchip'], >>> ['keypoint'], ['fgweight'], ['multitest'], ['multitest_score'], ], >>> 'multi_keypoint_nnindexer': [ ['dummy_annot'], ['chip'], ['keypoint'], >>> ['nnindexer'], ['multitest'], ['multitest_score'], ], >>> 'normal': [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'], >>> ['fgweight'], ['spam'], ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_multitest_1': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_multitest_2': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_notchpair_1': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_notchpair_2': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> } >>> topsort = [u'dummy_annot', u'notch', u'probchip', u'chip', u'keypoint', >>> u'fgweight', u'nnindexer', u'spam', u'notchpair', u'multitest', >>> u'multitest_score'] >>> print(ut.repr3(ut.merge_level_order(level_orders, topsort))) EG2: level_orders = {u'normal': [[u'dummy_annot'], [u'chip', u'probchip'], [u'keypoint'], [u'fgweight'], [u'spam']]} topsort = [u'dummy_annot', u'probchip', u'chip', u'keypoint', u'fgweight', u'spam'] """ import utool as ut if False: compute_order = [] level_orders = ut.map_dict_vals(ut.total_flatten, level_orders) level_sets = ut.map_dict_vals(set, level_orders) for tablekey in topsort: compute_order.append((tablekey, [groupkey for groupkey, set_ in level_sets.items() if tablekey in set_])) return compute_order else: # Do on common subgraph import itertools # Pointer to current level.: Start at the end and # then work your way up. main_ptr = len(topsort) - 1 stack = [] #from six.moves import zip_longest keys = list(level_orders.keys()) type_to_ptr = {key: -1 for key in keys} print('level_orders = %s' % (ut.repr3(level_orders),)) for count in itertools.count(0): print('----') print('count = %r' % (count,)) ptred_levels = [] for key in keys: levels = level_orders[key] ptr = type_to_ptr[key] try: level = tuple(levels[ptr]) except IndexError: level = None ptred_levels.append(level) print('ptred_levels = %r' % (ptred_levels,)) print('main_ptr = %r' % (main_ptr,)) # groupkeys, groupxs = ut.group_indices(ptred_levels) # Group keys are tablenames # They point to the (type) of the input # num_levelkeys = len(ut.total_flatten(ptred_levels)) groupkeys, groupxs = ut.group_indices(ptred_levels) main_idx = None while main_idx is None and main_ptr >= 0: target = topsort[main_ptr] print('main_ptr = %r' % (main_ptr,)) print('target = %r' % (target,)) # main_idx = ut.listfind(groupkeys, (target,)) # if main_idx is None: possible_idxs = [idx for idx, keytup in enumerate(groupkeys) if keytup is not None and target in keytup] if len(possible_idxs) == 1: main_idx = possible_idxs[0] else: main_idx = None if main_idx is None: main_ptr -= 1 if main_idx is None: print('break I') break found_groups = ut.apply_grouping(keys, groupxs)[main_idx] print('found_groups = %r' % (found_groups,)) stack.append((target, found_groups)) for k in found_groups: type_to_ptr[k] -= 1 if len(found_groups) == len(keys): main_ptr -= 1 if main_ptr < 0: print('break E') break print('stack = %s' % (ut.repr3(stack),)) print('have = %r' % (sorted(ut.take_column(stack, 0)),)) print('need = %s' % (sorted(ut.total_flatten(level_orders.values())),)) compute_order = stack[::-1] return compute_order