def negative_sample(pccs, per_pair=None): import utool as ut rng = ut.ensure_rng(2039141610, 'python') neg_pcc_pairs = ut.random_combinations(pccs, 2, rng=rng) yield from util.roundrobin( ut.random_product((cc1, cc2), num=per_pair, rng=rng) for cc1, cc2 in neg_pcc_pairs)
def testdata_smk(*args, **kwargs): """ >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> kwargs = {} """ import wbia import sklearn import sklearn.model_selection # import sklearn.model_selection ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST') nid_list = np.array(ibs.annots(aid_list).nids) rng = ut.ensure_rng(0) xvalkw = dict(n_splits=4, shuffle=False, random_state=rng) skf = sklearn.model_selection.StratifiedKFold(**xvalkw) train_idx, test_idx = six.next(skf.split(aid_list, nid_list)) daids = ut.take(aid_list, train_idx) qaids = ut.take(aid_list, test_idx) config = { 'num_words': 1000, } config.update(**kwargs) qreq_ = SMKRequest(ibs, qaids, daids, config) smk = qreq_.smk # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={'pipeline_root': 'smk', 'proot': 'smk'}) # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={}) return ibs, smk, qreq_
def __init__(self, pblm, pccs, dim=224): super(RandomBalancedIBEISSample, self).__init__(dim=dim) import utool as ut chip_config = {'resize_dim': 'wh', 'dim_size': (self.dim, self.dim)} self.pccs = pccs all_aids = ut.flatten(pccs) all_fpaths = pblm.infr.ibs.depc_annot.get('chips', all_aids, read_extern=False, colnames='img', config=chip_config) self.aid_to_fpath = dict(zip(all_aids, all_fpaths)) # self.multitons_pccs = [pcc for pcc in pccs if len(pcc) > 1] self.pos_pairs = [] # SAMPLE ALL POSSIBLE POS COMBINATIONS AND IGNORE INCOMPARABLE self.infr = pblm.infr # todo each sample should really get a weight depending on num aids in # its pcc for pcc in pccs: if len(pcc) >= 2: edges = np.array( list(it.starmap(self.infr.e_, it.combinations(pcc, 2)))) is_comparable = self.is_comparable(edges) pos_edges = edges[is_comparable] self.pos_pairs.extend(list(pos_edges)) rng = ut.ensure_rng(563401, 'numpy') self.pyrng = ut.ensure_rng(564043, 'python') self.rng = rng if True: depends = [ sorted(map(sorted, self.pccs)), ] hashid = hashutil.hash_data(depends)[:8] self.input_id = '{}-{}'.format(len(self), hashid)
def __init__(oracle, accuracy, rng): if isinstance(rng, six.string_types): rng = sum(map(ord, rng)) rng = ut.ensure_rng(rng, impl='python') if isinstance(accuracy, tuple): oracle.normal_accuracy = accuracy[0] oracle.recover_accuracy = accuracy[1] else: oracle.normal_accuracy = accuracy oracle.recover_accuracy = accuracy # .5 oracle.rng = rng oracle.states = {POSTV, NEGTV, INCMP}
def __init__(self, img1_fpaths, img2_fpaths, labels, dim=224): super(LabeledPairDataset, self).__init__(dim=dim) assert len(img1_fpaths) == len(img2_fpaths) assert len(labels) == len(img2_fpaths) self.img1_fpaths = list(img1_fpaths) self.img2_fpaths = list(img2_fpaths) self.labels = list(labels) # Hack for input id if True: depends = [self.img1_fpaths, self.img2_fpaths, self.labels] hashid = hashutil.hash_data(depends)[:8] self.input_id = '{}-{}'.format(len(self), hashid) import utool as ut rng = ut.ensure_rng(3432, 'numpy') self.rng = rng
def stratified_label_shuffle_split(y, labels, fractions, y_idx=None, rng=None): """ modified from sklearn to make n splits instaed of 2. Also enforces that labels are not broken into separate groups. Args: y (ndarray): labels labels (?): fractions (?): rng (RandomState): random number generator(default = None) Returns: ?: index_sets CommandLine: python -m ibeis_cnn.dataset stratified_label_shuffle_split --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.dataset import * # NOQA >>> y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7] >>> fractions = [.7, .3] >>> rng = np.random.RandomState(0) >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng) """ rng = ut.ensure_rng(rng) #orig_y = y unique_labels, groupxs = ut.group_indices(labels) grouped_ys = ut.apply_grouping(y, groupxs) # Assign each group a probabilistic class unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys] # TODO: should weight the following selection based on size of group #class_weights = [ut.dict_hist(ys) for ys in grouped_ys] unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng) index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs] if y_idx is not None: # These indicies subindex into parent set of indicies index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets] return index_sets
def stratified_kfold_label_split(y, labels, n_folds=2, y_idx=None, rng=None): """ Also enforces that labels are not broken into separate groups. Args: y (ndarray): labels labels (?): y_idx (array): indexes associated with y if it was already presampled rng (RandomState): random number generator(default = None) Returns: ?: index_sets CommandLine: python -m ibeis_cnn.dataset stratified_label_shuffle_split --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.dataset import * # NOQA >>> y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7] >>> fractions = [.7, .3] >>> rng = np.random.RandomState(0) >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng) """ rng = ut.ensure_rng(rng) #orig_y = y unique_labels, groupxs = ut.group_indices(labels) grouped_ys = ut.apply_grouping(y, groupxs) # Assign each group a probabilistic class unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys] # TODO: should weight the following selection based on size of group #class_weights = [ut.dict_hist(ys) for ys in grouped_ys] import sklearn.cross_validation xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng) skf = sklearn.cross_validation.StratifiedKFold(unique_ys, **xvalkw) _iter = skf folded_index_sets = [] for label_idx_set in _iter: index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in label_idx_set] folded_index_sets.append(index_sets) for train_idx, test_idx in folded_index_sets: train_labels = set(ut.take(labels, train_idx)) test_labels = set(ut.take(labels, test_idx)) assert len(test_labels.intersection(train_labels)) == 0, 'same labels appeared in both train and test' pass if y_idx is not None: # These indicies subindex into parent set of indicies folded_index_sets2 = [] for index_sets in folded_index_sets: index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets] folded_index_sets2.append(index_sets) folded_index_sets = folded_index_sets2 #import sklearn.model_selection #skf = sklearn.model_selection.StratifiedKFold(**xvalkw) #_iter = skf.split(X=np.empty(len(target)), y=target) #unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng) #index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs] #if idx is not None: # # These indicies subindex into parent set of indicies # index_sets = [np.take(idx, idxs, axis=0) for idxs in index_sets] return folded_index_sets
def positive_sample(pccs, per_cc=None): import utool as ut rng = ut.ensure_rng(2039141610, 'python') yield from util.roundrobin( ut.random_combinations(cc, size=2, num=per_cc, rng=rng) for cc in pccs)
def _cm_training_pairs( infr, qreq_=None, cm_list=None, top_gt=2, mid_gt=2, bot_gt=2, top_gf=2, mid_gf=2, bot_gf=2, rand_gt=2, rand_gf=2, rng=None, ): """ Constructs training data for a pairwise classifier CommandLine: python -m wbia.algo.graph.core _cm_training_pairs Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.core import * # NOQA >>> infr = testdata_infr('PZ_MTEST') >>> infr.exec_matching(cfgdict={ >>> 'can_match_samename': True, >>> 'K': 4, >>> 'Knorm': 1, >>> 'prescore_method': 'csum', >>> 'score_method': 'csum' >>> }) >>> from wbia.algo.graph.core import * # NOQA >>> exec(ut.execstr_funckw(infr._cm_training_pairs)) >>> rng = np.random.RandomState(42) >>> aid_pairs = np.array(infr._cm_training_pairs(rng=rng)) >>> print(len(aid_pairs)) >>> assert np.sum(aid_pairs.T[0] == aid_pairs.T[1]) == 0 """ if qreq_ is None: cm_list = infr.cm_list qreq_ = infr.qreq_ ibs = infr.ibs aid_pairs = [] dnids = qreq_.get_qreq_annot_nids(qreq_.daids) # dnids = qreq_.get_qreq_annot_nids(qreq_.daids) rng = ut.ensure_rng(rng) for cm in ut.ProgIter(cm_list, lbl='building pairs'): all_gt_aids = cm.get_top_gt_aids(ibs) all_gf_aids = cm.get_top_gf_aids(ibs) gt_aids = ut.take_percentile_parts(all_gt_aids, top_gt, mid_gt, bot_gt) gf_aids = ut.take_percentile_parts(all_gf_aids, top_gf, mid_gf, bot_gf) # get unscored examples unscored_gt_aids = [ aid for aid in qreq_.daids[cm.qnid == dnids] if aid not in cm.daid2_idx ] rand_gt_aids = ut.random_sample(unscored_gt_aids, rand_gt, rng=rng) # gf_aids = cm.get_groundfalse_daids() _gf_aids = qreq_.daids[cm.qnid != dnids] _gf_aids = qreq_.daids.compress(cm.qnid != dnids) # gf_aids = ibs.get_annot_groundfalse(cm.qaid, daid_list=qreq_.daids) rand_gf_aids = ut.random_sample(_gf_aids, rand_gf, rng=rng).tolist() chosen_daids = ut.unique(gt_aids + gf_aids + rand_gf_aids + rand_gt_aids) aid_pairs.extend([(cm.qaid, aid) for aid in chosen_daids if cm.qaid != aid]) return aid_pairs