def set_edge(self, edge, info_text=None): aid1, aid2 = edge if qreq_ is None: qreq2_ = ibs.new_query_request([aid1], [aid2], cfgdict=cfgdict, verbose=False) else: qreq2_ = ibs.new_query_request( [aid1], [aid2], cfgdict=qreq_.qparams, verbose=False ) qconfig2_ = qreq2_.extern_query_config2 dconfig2_ = qreq2_.extern_data_config2 annot1 = ibs.annots([aid1], config=qconfig2_)[0]._make_lazy_dict() annot2 = ibs.annots([aid2], config=dconfig2_)[0]._make_lazy_dict() match = vt.PairwiseMatch(annot1, annot2) def on_context(): from wbia.gui import inspect_gui return inspect_gui.make_annotpair_context_options(ibs, aid1, aid2, None) self.set_match(match, on_context, info_text)
def bigcache_vsone(qreq_, hyper_params): """ Cached output of one-vs-one matches >>> from wbia.scripts.script_vsone import * # NOQA >>> self = OneVsOneProblem() >>> qreq_ = self.qreq_ >>> hyper_params = self.hyper_params """ import vtool as vt import wbia # Get a set of training pairs ibs = qreq_.ibs cm_list = qreq_.execute() infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True) # Per query choose a set of correct, incorrect, and random training pairs aid_pairs_ = infr._cm_training_pairs( rng=np.random.RandomState(42), **hyper_params.pair_sample ) aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist() pb_aid_pairs_ = photobomb_samples(ibs) # TODO: try to add in more non-comparable samples aid_pairs_ = pb_aid_pairs_ + aid_pairs_ aid_pairs_ = vt.unique_rows(np.array(aid_pairs_)) # ====================================== # Compute one-vs-one scores and local_measures # ====================================== # Prepare lazy attributes for annotations qreq_ = infr.qreq_ ibs = qreq_.ibs qconfig2_ = qreq_.extern_query_config2 dconfig2_ = qreq_.extern_data_config2 qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_) dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_) # Remove any pairs missing features if dannot_cfg == qannot_cfg: unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg) bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids bad_aids = set(bad_aids) else: annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg) annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg) bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids bad_aids = set(bad_aids1 + bad_aids2) subset_idxs = np.where( [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_] )[0] # Keep only a random subset if hyper_params.subsample: rng = np.random.RandomState(3104855634) num_max = hyper_params.subsample if num_max < len(subset_idxs): subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False) subset_idxs = sorted(subset_idxs) # Take the current selection aid_pairs = ut.take(aid_pairs_, subset_idxs) if True: # NEW WAY config = hyper_params.vsone_assign # TODO: ensure annot probs like chips and features can be appropriately # set via qreq_ config or whatever matches = infr.exec_vsone_subset(aid_pairs, config=config) else: query_aids = ut.take_column(aid_pairs, 0) data_aids = ut.take_column(aid_pairs, 1) # OLD WAY # Determine a unique set of annots per config configured_aids = ut.ddict(set) configured_aids[qannot_cfg].update(query_aids) configured_aids[dannot_cfg].update(data_aids) # Make efficient annot-object representation configured_obj_annots = {} for config, aids in configured_aids.items(): annots = ibs.annots(sorted(list(aids)), config=config) configured_obj_annots[config] = annots annots1 = configured_obj_annots[qannot_cfg].loc(query_aids) annots2 = configured_obj_annots[dannot_cfg].loc(data_aids) # Get hash based on visual annotation appearence of each pair # as well as algorithm configurations used to compute those properties qvuuids = annots1.visual_uuids dvuuids = annots2.visual_uuids qcfgstr = annots1._config.get_cfgstr() dcfgstr = annots2._config.get_cfgstr() annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr) vsone_uuids = [ ut.combine_uuids(uuids, salt=annots_cfgstr) for uuids in ut.ProgIter( zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids' ) ] # Combine into a big cache for the entire 1-v-1 matching run big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True) cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train') cached_data = cacher.tryload() if cached_data is not None: # Caching doesn't work 100% for PairwiseMatch object, so we need to do # some postprocessing configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot_dict[_annot.aid] = _annot._make_lazy_dict() # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # Create a set of PairwiseMatches with the correct annot properties matches = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Updating a new matches dictionary ensure the annot1/annot2 properties # are set correctly for key, cached_matches in list(cached_data.items()): fixed_matches = [match.copy() for match in matches] for fixed, internal in zip(fixed_matches, cached_matches): dict_ = internal.__dict__ ut.delete_dict_keys(dict_, ['annot1', 'annot2']) fixed.__dict__.update(dict_) cached_data[key] = fixed_matches else: cached_data = vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ) cacher.save(cached_data) # key_ = 'SV_LNBNN' key_ = 'RAT_SV' # for key in list(cached_data.keys()): # if key != 'SV_LNBNN': # del cached_data[key] matches = cached_data[key_] return matches, infr
def vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ): # Do vectorized preload before constructing lazy dicts # Then make sure the lazy dicts point to this subset unique_obj_annots = list(configured_obj_annots.values()) for annots in ut.ProgIter(unique_obj_annots, 'vectorized preload'): annots.set_caching(True) annots.chip_size annots.vecs annots.kpts annots.yaw annots.qual annots.gps annots.time if qreq_.qparams.featweight_enabled: annots.fgweights # annots._internal_attrs.clear() # Make convinient lazy dict representations (after loading pre info) configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot = _annot._make_lazy_dict() annot_dict[_annot.aid] = annot unique_lazy_annots = ut.flatten([x.values() for x in configured_lazy_annots.values()]) flann_params = {'algorithm': 'kdtree', 'trees': 4} for annot in ut.ProgIter(unique_lazy_annots, label='lazy flann'): vt.matching.ensure_metadata_flann(annot, flann_params) vt.matching.ensure_metadata_normxy(annot) for annot in ut.ProgIter(unique_lazy_annots, 'preload kpts'): annot['kpts'] for annot in ut.ProgIter(unique_lazy_annots, 'preload normxy'): annot['norm_xys'] for annot in ut.ProgIter(unique_lazy_annots, 'preload vecs'): annot['vecs'] # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # TODO: param search over grid # 'use_sv': [0, 1], # 'use_fg': [0, 1], # 'use_ratio_test': [0, 1], matches_RAT = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Construct global measurements global_keys = ['yaw', 'qual', 'gps', 'time'] for match in ut.ProgIter(matches_RAT, label='setup globals'): match.add_global_measures(global_keys) # Preload flann for only specific annots for match in ut.ProgIter(matches_RAT, label='preload FLANN'): match.annot1['flann'] cfgdict = hyper_params.vsone_assign # Find one-vs-one matches # cfgdict = {'checks': 20, 'symmetric': False} for match in ut.ProgIter(matches_RAT, label='assign vsone'): match.assign(cfgdict=cfgdict) # gridsearch_ratio_thresh() # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_ratio_test', { # 'ratio_thresh': np.linspace(.6, .7, 50) # }) for match in ut.ProgIter(matches_RAT, label='apply ratio thresh'): match.apply_ratio_test({'ratio_thresh': 0.638}, inplace=True) # TODO gridsearch over sv params # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_sver', { # 'xy_thresh': np.linspace(0, 1, 3) # }) matches_RAT_SV = [ match.apply_sver(inplace=True) for match in ut.ProgIter(matches_RAT, label='sver') ] # Add keypoint spatial information to local features for match in matches_RAT_SV: match.add_local_measures() # key_ = 'norm_xys' # norm_xy1 = match.annot1[key_].take(match.fm.T[0], axis=1) # norm_xy2 = match.annot2[key_].take(match.fm.T[1], axis=1) # match.local_measures['norm_x1'] = norm_xy1[0] # match.local_measures['norm_y1'] = norm_xy1[1] # match.local_measures['norm_x2'] = norm_xy2[0] # match.local_measures['norm_y2'] = norm_xy2[1] # match.local_measures['scale1'] = vt.get_scales( # match.annot1['kpts'].take(match.fm.T[0], axis=0)) # match.local_measures['scale2'] = vt.get_scales( # match.annot2['kpts'].take(match.fm.T[1], axis=0)) # Create another version where we find global normalizers for the data # qreq_.load_indexer() # matches_SV_LNBNN = batch_apply_lnbnn(matches_RAT_SV, qreq_, inplace=True) # if 'weight' in cfgdict: # for match in matches_SV_LNBNN[::-1]: # lnbnn_dist = match.local_measures['lnbnn'] # ndist = match.local_measures['lnbnn_norm_dist'] # weights = match.local_measures[cfgdict['weight']] # match.local_measures['weighted_lnbnn'] = weights * lnbnn_dist # match.local_measures['weighted_lnbnn_norm_dist'] = weights * ndist # match.fs = match.local_measures['weighted_lnbnn'] cached_data = { # 'RAT': matches_RAT, 'RAT_SV': matches_RAT_SV, # 'SV_LNBNN': matches_SV_LNBNN, } return cached_data from sklearn.metrics.classification import coo_matrix def quick_cm(y_true, y_pred, labels, sample_weight): n_labels = len(labels) C = coo_matrix( (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels) ).toarray() return C def quick_mcc(C): """ assumes y_true and y_pred are in index/encoded format """ t_sum = C.sum(axis=1) p_sum = C.sum(axis=0) n_correct = np.diag(C).sum() n_samples = p_sum.sum() cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum) cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum) cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum) mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp) return mcc def mcc_hack(): sample_weight = np.ones(len(self.samples), dtype=np.int) task_mccs = ut.ddict(dict) # Determine threshold levels per score type score_to_order = {} for scoretype in score_dict.keys(): y_score = score_dict[scoretype].values sortx = np.argsort(y_score, kind='mergesort')[::-1] y_score = y_score[sortx] distinct_value_indices = np.where(np.diff(y_score))[0] threshold_idxs = np.r_[distinct_value_indices, y_score.size - 1] thresh = y_score[threshold_idxs] score_to_order[scoretype] = (sortx, y_score, thresh) classes_ = np.array([0, 1], dtype=np.int) for task in task_list: labels = self.samples.subtasks[task] for sublabels in labels.gen_one_vs_rest_labels(): for scoretype in score_dict.keys(): sortx, y_score, thresh = score_to_order[scoretype] y_true = sublabels.y_enc[sortx] mcc = -np.inf for t in thresh: y_pred = (y_score > t).astype(np.int) C1 = quick_cm(y_true, y_pred, classes_, sample_weight) mcc1 = quick_mcc(C1) if mcc1 < 0: C2 = quick_cm(y_true, 1 - y_pred, classes_, sample_weight) mcc1 = quick_mcc(C2) mcc = max(mcc1, mcc) # logger.info('mcc = %r' % (mcc,)) task_mccs[sublabels.task_name][scoretype] = mcc return task_mccs if 0: with ut.Timer('mcc'): task_mccs = mcc_hack() logger.info('\nMCC of simple scoring measures:') df = pd.DataFrame.from_dict(task_mccs, orient='index') from utool.experimental.pandas_highlight import to_string_monkey logger.info(to_string_monkey(df, highlight_cols=np.arange(len(df.columns))))
def demo_classes(pblm): r""" CommandLine: python -m ibeis.algo.verif.vsone demo_classes --saveparts --save=classes.png --clipwhite python -m ibeis.algo.verif.vsone demo_classes --saveparts --save=figures/classes.png --clipwhite --dpath=~/latex/crall-iccv-2017 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty(defaultdb='PZ_PB_RF_TRAIN') >>> pblm.load_features() >>> pblm.load_samples() >>> pblm.build_feature_subsets() >>> pblm.demo_classes() >>> ut.show_if_requested() """ task_key = 'match_state' labels = pblm.samples.subtasks[task_key] pb_labels = pblm.samples.subtasks['photobomb_state'] classname_offset = { POSTV: 0, NEGTV: 0, INCMP: 0, } class_name = POSTV class_name = NEGTV class_name = INCMP feats = pblm.samples.X_dict['learn(sum,glob)'] offset = 0 class_to_edge = {} for class_name in labels.class_names: print('Find example of %r' % (class_name, )) # Find an example of each class (that is not a photobomb) pbflags = pb_labels.indicator_df['notpb'] flags = labels.indicator_df[class_name] assert np.all(pbflags.index == flags.index) flags = flags & pbflags ratio = feats['sum(ratio)'] if class_name == INCMP: # flags &= feats['global(delta_yaw)'] > 3 flags &= feats['global(delta_view)'] > 2 # flags &= feats['sum(ratio)'] > 0 if class_name == NEGTV: low = ratio[flags].max() flags &= feats['sum(ratio)'] >= low if class_name == POSTV: low = ratio[flags].median() / 2 high = ratio[flags].median() flags &= feats['sum(ratio)'] < high flags &= feats['sum(ratio)'] > low # flags &= pblm.samples.simple_scores[flags]['score_lnbnn_1vM'] > 0 idxs = np.where(flags)[0] print('Found %d candidates' % (len(idxs))) offset = classname_offset[class_name] idx = idxs[offset] series = labels.indicator_df.iloc[idx] assert series[class_name] edge = series.name class_to_edge[class_name] = edge import plottool as pt import guitool as gt gt.ensure_qapp() pt.qtensure() fnum = 1 pt.figure(fnum=fnum, pnum=(1, 3, 1)) pnum_ = pt.make_pnum_nextgen(1, 3) # classname_alias = { # POSTV: 'positive', # NEGTV: 'negative', # INCMP: 'incomparable', # } ibs = pblm.infr.ibs for class_name in class_to_edge.keys(): edge = class_to_edge[class_name] aid1, aid2 = edge # alias = classname_alias[class_name] print('class_name = %r' % (class_name, )) annot1 = ibs.annots([aid1])[0]._make_lazy_dict() annot2 = ibs.annots([aid2])[0]._make_lazy_dict() vt.matching.ensure_metadata_normxy(annot1) vt.matching.ensure_metadata_normxy(annot2) match = vt.PairwiseMatch(annot1, annot2) cfgdict = pblm.hyper_params.vsone_match.asdict() match.apply_all(cfgdict) pt.figure(fnum=fnum, pnum=pnum_()) match.show(show_ell=False, show_ori=False)
def demo_single_pairwise_feature_vector(): r""" CommandLine: python -m ibeis.algo.verif.vsone demo_single_pairwise_feature_vector Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.verif.vsone import * # NOQA >>> match = demo_single_pairwise_feature_vector() >>> print(match) """ import vtool as vt import ibeis ibs = ibeis.opendb('testdb1') qaid, daid = 1, 2 annot1 = ibs.annots([qaid])[0]._make_lazy_dict() annot2 = ibs.annots([daid])[0]._make_lazy_dict() vt.matching.ensure_metadata_normxy(annot1) vt.matching.ensure_metadata_normxy(annot2) match = vt.PairwiseMatch(annot1, annot2) cfgdict = {'checks': 200, 'symmetric': False} match.assign(cfgdict=cfgdict) match.apply_ratio_test({'ratio_thresh': .638}, inplace=True) match.apply_sver(inplace=True) # match.add_global_measures(['yaw', 'qual', 'gps', 'time']) match.add_global_measures(['view', 'qual', 'gps', 'time']) match.add_local_measures() # sorters = ['ratio', 'norm_dist', 'match_dist'] match.make_feature_vector() return match def demo_classes(pblm): r""" CommandLine: python -m ibeis.algo.verif.vsone demo_classes --saveparts --save=classes.png --clipwhite python -m ibeis.algo.verif.vsone demo_classes --saveparts --save=figures/classes.png --clipwhite --dpath=~/latex/crall-iccv-2017 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty(defaultdb='PZ_PB_RF_TRAIN') >>> pblm.load_features() >>> pblm.load_samples() >>> pblm.build_feature_subsets() >>> pblm.demo_classes() >>> ut.show_if_requested() """ task_key = 'match_state' labels = pblm.samples.subtasks[task_key] pb_labels = pblm.samples.subtasks['photobomb_state'] classname_offset = { POSTV: 0, NEGTV: 0, INCMP: 0, } class_name = POSTV class_name = NEGTV class_name = INCMP feats = pblm.samples.X_dict['learn(sum,glob)'] offset = 0 class_to_edge = {} for class_name in labels.class_names: print('Find example of %r' % (class_name, )) # Find an example of each class (that is not a photobomb) pbflags = pb_labels.indicator_df['notpb'] flags = labels.indicator_df[class_name] assert np.all(pbflags.index == flags.index) flags = flags & pbflags ratio = feats['sum(ratio)'] if class_name == INCMP: # flags &= feats['global(delta_yaw)'] > 3 flags &= feats['global(delta_view)'] > 2 # flags &= feats['sum(ratio)'] > 0 if class_name == NEGTV: low = ratio[flags].max() flags &= feats['sum(ratio)'] >= low if class_name == POSTV: low = ratio[flags].median() / 2 high = ratio[flags].median() flags &= feats['sum(ratio)'] < high flags &= feats['sum(ratio)'] > low # flags &= pblm.samples.simple_scores[flags]['score_lnbnn_1vM'] > 0 idxs = np.where(flags)[0] print('Found %d candidates' % (len(idxs))) offset = classname_offset[class_name] idx = idxs[offset] series = labels.indicator_df.iloc[idx] assert series[class_name] edge = series.name class_to_edge[class_name] = edge import plottool as pt import guitool as gt gt.ensure_qapp() pt.qtensure() fnum = 1 pt.figure(fnum=fnum, pnum=(1, 3, 1)) pnum_ = pt.make_pnum_nextgen(1, 3) # classname_alias = { # POSTV: 'positive', # NEGTV: 'negative', # INCMP: 'incomparable', # } ibs = pblm.infr.ibs for class_name in class_to_edge.keys(): edge = class_to_edge[class_name] aid1, aid2 = edge # alias = classname_alias[class_name] print('class_name = %r' % (class_name, )) annot1 = ibs.annots([aid1])[0]._make_lazy_dict() annot2 = ibs.annots([aid2])[0]._make_lazy_dict() vt.matching.ensure_metadata_normxy(annot1) vt.matching.ensure_metadata_normxy(annot2) match = vt.PairwiseMatch(annot1, annot2) cfgdict = pblm.hyper_params.vsone_match.asdict() match.apply_all(cfgdict) pt.figure(fnum=fnum, pnum=pnum_()) match.show(show_ell=False, show_ori=False) # pt.set_title(alias) def find_opt_ratio(pblm): """ script to help find the correct value for the ratio threshold >>> from ibeis.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty('PZ_PB_RF_TRAIN') >>> pblm = OneVsOneProblem.from_empty('GZ_Master1') """ # Find best ratio threshold pblm.load_samples() infr = pblm.infr edges = ut.emap(tuple, pblm.samples.aid_pairs.tolist()) task = pblm.samples['match_state'] pos_idx = task.class_names.tolist().index(POSTV) config = {'ratio_thresh': 1.0, 'sv_on': False} matches = infr._exec_pairwise_match(edges, config) import plottool as pt pt.qtensure() thresholds = np.linspace(0, 1.0, 100) pos_truth = task.y_bin.T[pos_idx] ratio_fs = [m.local_measures['ratio'] for m in matches] aucs = [] # Given the current correspondences: Find the optimal # correspondence threshold. for thresh in ut.ProgIter(thresholds, 'computing thresh'): scores = np.array([fs[fs < thresh].sum() for fs in ratio_fs]) roc = sklearn.metrics.roc_auc_score(pos_truth, scores) aucs.append(roc) aucs = np.array(aucs) opt_auc = aucs.max() opt_thresh = thresholds[aucs.argmax()] if True: pt.plt.plot(thresholds, aucs, 'r-', label='') pt.plt.plot(opt_thresh, opt_auc, 'ro', label='L opt=%r' % (opt_thresh, )) pt.set_ylabel('auc') pt.set_xlabel('ratio threshold') pt.legend()