def init_test_mode(infr): from ibeis.algo.graph import nx_dynamic_graph infr.print('init_test_mode') infr.test_mode = True # infr.edge_truth = {} infr.metrics_list = [] infr.test_state = { 'n_decision': 0, 'n_algo': 0, 'n_manual': 0, 'n_true_merges': 0, 'n_error_edges': 0, 'confusion': None, } infr.test_gt_pos_graph = nx_dynamic_graph.DynConnGraph() infr.test_gt_pos_graph.add_nodes_from(infr.aids) infr.nid_to_gt_cc = ut.group_items(infr.aids, infr.orig_name_labels) infr.node_truth = ut.dzip(infr.aids, infr.orig_name_labels) # infr.real_n_pcc_mst_edges = sum( # len(cc) - 1 for cc in infr.nid_to_gt_cc.values()) # ut.cprint('real_n_pcc_mst_edges = %r' % ( # infr.real_n_pcc_mst_edges,), 'red') infr.metrics_list = [] infr.nid_to_gt_cc = ut.group_items(infr.aids, infr.orig_name_labels) infr.real_n_pcc_mst_edges = sum( len(cc) - 1 for cc in infr.nid_to_gt_cc.values()) infr.print('real_n_pcc_mst_edges = %r' % ( infr.real_n_pcc_mst_edges,), color='red')
def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids
def find_duplicates(index): # fpaths = list(index.files.keys()) files = list(index.files.values()) print('Grouping {} files'.format(len(files))) grouped = ut.group_items(files, [f.nbytes for f in files]) print('Found {} groups'.format(len(grouped))) potential_dups = {k: v for k, v in grouped.items() if len(v) > 1} print('Found {} potential dups by nbytes'.format(len(potential_dups))) GB = 2**30 # NOQA MB = 2**20 # NOQA max_bytes = 10 * MB min_bytes = 64 * MB duplicates = [] for k, fs in ut.ProgIter(potential_dups.items(), freq=1): names = [f.n for f in fs] if ut.allsame(names): # Don't do big files yet if k < max_bytes and k > min_bytes: if ut.allsame([f.hashid for f in fs]): duplicates.extend(fs) for f1, f2 in ut.combinations(fs, 2): f1.duplicates.add(f2) f2.duplicates.add(f1) def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2) # return sim_measures similarities = {} r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates]) for dpath, dups in r_to_dup.items(): # Check to see if the duplicates all point to the same dir f = dups[0] # NOQA common_dpath = set.intersection(*[{_.r for _ in f.duplicates} for f in dups]) for other in common_dpath: sim_measures = dpath_similarity(index, dpath, other) similarities[(dpath, other)] = sim_measures print(ut.repr4(similarities, si=True, nl=2))
def find_duplicates(index): # fpaths = list(index.files.keys()) files = list(index.files.values()) print('Grouping {} files'.format(len(files))) grouped = ut.group_items(files, [f.nbytes for f in files]) print('Found {} groups'.format(len(grouped))) potential_dups = {k: v for k, v in grouped.items() if len(v) > 1} print('Found {} potential dups by nbytes'.format(len(potential_dups))) GB = 2 ** 30 # NOQA MB = 2 ** 20 # NOQA max_bytes = 10 * MB min_bytes = 64 * MB duplicates = [] for k, fs in ut.ProgIter(potential_dups.items(), freq=1): names = [f.n for f in fs] if ut.allsame(names): # Don't do big files yet if k < max_bytes and k > min_bytes: if ut.allsame([f.hashid for f in fs]): duplicates.extend(fs) for f1, f2 in ut.combinations(fs, 2): f1.duplicates.add(f2) f2.duplicates.add(f1) def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2) # return sim_measures similarities = {} r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates]) for dpath, dups in r_to_dup.items(): # Check to see if the duplicates all point to the same dir f = dups[0] # NOQA common_dpath = set.intersection(*[ {_.r for _ in f.duplicates} for f in dups]) for other in common_dpath: sim_measures = dpath_similarity(index, dpath, other) similarities[(dpath, other)] = sim_measures print(ut.repr4(similarities, si=True, nl=2))
def testdb2_stuff(): """ tar -zcvf testdb2.tar.gz testdb2/ """ import ibeis ibs = ibeis.opendb('testdb2') #ibs.ensure_contributor_rowids() gid_list = ibs.get_valid_gids() # Group gids by species image_species_list = ut.get_list_column( ibs.unflat_map(ibs.get_annot_species_rowids, ibs.get_image_aids(gid_list)), 0) new_contrib_rowid1 = ibs.add_new_temp_contributor(offset=len(ibs.get_valid_contrib_rowids())) new_contrib_rowid2 = ibs.add_new_temp_contributor(offset=len(ibs.get_valid_contrib_rowids())) gids1, gids2 = list(ut.group_items(gid_list, image_species_list).values()) party_rowids = ibs.add_party(['TestCar1', 'TestCar2']) partyid1, partyid2 = party_rowids ibs.set_image_contributor_rowid(gids1, [new_contrib_rowid1] * len(gids1)) ibs.set_image_contributor_rowid(gids2, [new_contrib_rowid2] * len(gids2)) ibs.set_image_party_rowids(gids1, [partyid1] * len(gids1)) ibs.set_image_party_rowids(gids2, [partyid2] * len(gids2))
def group_review(): prefill = request.args.get('prefill', '') if len(prefill) > 0: ibs = current_app.ibs aid_list = ibs.get_valid_aids() bad_species_list, bad_viewpoint_list = ibs.validate_annot_species_viewpoint_cnn(aid_list) GROUP_BY_PREDICTION = True if GROUP_BY_PREDICTION: grouped_dict = ut.group_items(bad_viewpoint_list, ut.get_list_column(bad_viewpoint_list, 3)) grouped_list = grouped_dict.values() regrouped_items = ut.flatten(ut.sortedby(grouped_list, map(len, grouped_list))) candidate_aid_list = ut.get_list_column(regrouped_items, 0) else: candidate_aid_list = [ bad_viewpoint[0] for bad_viewpoint in bad_viewpoint_list] elif request.args.get('aid_list', None) is not None: aid_list = request.args.get('aid_list', '') if len(aid_list) > 0: aid_list = aid_list.replace('[', '') aid_list = aid_list.replace(']', '') aid_list = aid_list.strip().split(',') candidate_aid_list = [ int(aid_.strip()) for aid_ in aid_list ] else: candidate_aid_list = '' else: candidate_aid_list = '' return appf.template(None, 'group_review', candidate_aid_list=candidate_aid_list, mode_list=appf.VALID_TURK_MODES)
def get_encounter_num_names_with_exemplar(ibs, eid_list): r""" RESTful: Method: GET URL: /api/encounter/num_names_with_exemplar/ Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.manual_encounter_funcs import * # NOQA >>> import ibeis # NOQA >>> ibs = ibeis.opendb('testdb1') >>> eid_list = ibs._get_all_encounter_rowids() >>> num_annots_reviewed_list = ibs.get_encounter_num_annotmatch_reviewed(eid_list) """ aids_list = ibs.get_encounter_custom_filtered_aids(eid_list) exflags_list = ibs.unflat_map(ibs.get_annot_exemplar_flags, aids_list) nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list) groups_list = [ut.group_items(exflags, nids) for exflags, nids in zip(exflags_list, nids_list)] #num_names_list = [len(groups) for groups in groups_list] num_exemplared_names_list = [ sum([any(exflags) for exflags in six.itervalues(groups)]) for groups in groups_list ] return num_exemplared_names_list
def get_imageset_num_names_with_exemplar(ibs, imgsetid_list): r""" RESTful: Method: GET URL: /api/imageset/num_names_with_exemplar/ Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.manual_imageset_funcs import * # NOQA >>> import ibeis # NOQA >>> ibs = ibeis.opendb('testdb1') >>> imgsetid_list = ibs._get_all_imageset_rowids() >>> num_annots_reviewed_list = ibs.get_imageset_num_annotmatch_reviewed(imgsetid_list) """ aids_list = ibs.get_imageset_custom_filtered_aids(imgsetid_list) exflags_list = ibs.unflat_map(ibs.get_annot_exemplar_flags, aids_list) nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list) groups_list = [ ut.group_items(exflags, nids) for exflags, nids in zip(exflags_list, nids_list) ] #num_names_list = [len(groups) for groups in groups_list] num_exemplared_names_list = [ sum([any(exflags) for exflags in six.itervalues(groups)]) for groups in groups_list ] return num_exemplared_names_list
def find_needsmove_to_other(self, other): hash1 = self.get_prop('md5_stride') hash2 = other.get_prop('md5_stride') idxs1 = list(range(len(hash1))) hash_to_idxs = ut.group_items(idxs1, hash1) # Find what we have that other doesnt have and move it there other_missing = set(hash1).difference(hash2) missing_idxs1 = ut.flatten(ut.take(hash_to_idxs, other_missing)) data = ut.ColumnLists({ 'idx': missing_idxs1, 'fname': self.get_prop('fname', missing_idxs1), 'dname': self.get_prop('dname', missing_idxs1), 'full_path': self.get_prop('full_path', missing_idxs1), 'nbytes': self.get_prop('nbytes', missing_idxs1), }) data = data.compress([f != 'Thumbs.db' for f in data['fname']]) data['ext'] = self.get_prop('ext', data['idx']) ut.dict_hist(data['ext']) data.print(ignore=['full_path', 'dname'])
def report_partitioning_statistics(new_reduced_joint): # compute partitioning statistics import vtool as vt vals, idxs = vt.group_indices(new_reduced_joint.values.ravel()) #groupsize = list(map(len, idxs)) #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs) all_states = new_reduced_joint._row_labels(asindex=True) clusterstats = [tuple(sorted(list(ut.dict_hist(a).values()))) for a in all_states] grouped_vals = ut.group_items(new_reduced_joint.values.ravel(), clusterstats) #probs_assigned_to_clustertype = [( # sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a) # for a, b in grouped_vals.items()] probs_assigned_to_clustertype = [( ut.dict_hist(np.array(b).round(decimals=5)), a) for a, b in grouped_vals.items()] sortx = ut.argsort([max(c[0].keys()) for c in probs_assigned_to_clustertype]) probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx) # This list of 2-tuples with the first item being the unique # probabilies that are assigned to a cluster type along with the number # of times they were assigned. A cluster type is the second item. Every # number represents how many annotations were assigned to a specific # label. The length of that list is the number of total labels. For # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]] # indicating that that the assignment of everyone to a different label happend once # where the probability was somenum and a 800 times where the probability was 0. #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items()) #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()]) print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
def _print_previous_loop_statistics(infr, count): # Print stats about what happend in the this loop history = infr.metrics_list[-count:] recover_blocks = ut.group_items([ (k, sum(1 for i in g)) for k, g in it.groupby(ut.take_column(history, 'recovering')) ]).get(True, []) infr.print(( 'Recovery mode entered {} times, ' 'made {} recovery decisions.').format( len(recover_blocks), sum(recover_blocks)), color='green') testaction_hist = ut.dict_hist(ut.take_column(history, 'test_action')) infr.print( 'Test Action Histogram: {}'.format( ut.repr4(testaction_hist, si=True)), color='yellow') if infr.params['inference.enabled']: action_hist = ut.dict_hist( ut.emap(frozenset, ut.take_column(history, 'action'))) infr.print( 'Inference Action Histogram: {}'.format( ub.repr2(action_hist, si=True)), color='yellow') infr.print( 'Decision Histogram: {}'.format(ut.repr2(ut.dict_hist( ut.take_column(history, 'pred_decision') ), si=True)), color='yellow') infr.print( 'User Histogram: {}'.format(ut.repr2(ut.dict_hist( ut.take_column(history, 'user_id') ), si=True)), color='yellow')
def find_connecting_edges(infr): """ Searches for a small set of edges, which if reviewed as positive would ensure that each PCC is k-connected. Note that in somes cases this is not possible """ label = 'name_label' node_to_label = infr.get_node_attrs(label) label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values()) # k = infr.params['redun.pos'] k = 1 new_edges = [] prog = ut.ProgIter( list(label_to_nodes.keys()), label='finding connecting edges', enabled=infr.verbose > 0, ) for nid in prog: nodes = set(label_to_nodes[nid]) G = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = nxu.edges_inside(infr.neg_graph, nodes) impossible |= nxu.edges_inside(infr.incomp_graph, nodes) candidates = set(nx.complement(G).edges()) candidates.difference_update(impossible) aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates) new_edges += aug_edges prog.ensure_newline() return new_edges
def groundtruth_merge_loop(infr): """ Finds edges to make sure the ground truth is merged """ from wbia.algo.graph import nx_utils as nxu infr.print('==============================', color='white') infr.print('--- GROUNDTRUTH MERGE LOOP ---', color='white') assert infr.test_mode, 'only run this in test mode' group = ut.group_items(infr.aids, infr.orig_name_labels) fix_edges = [] # Tell the oracle its time to get serious # infr.oracle.normal_accuracy = 1.0 # infr.oracle.recover_accuracy = 1.0 for gt_nid, aids in group.items(): pos_sub = infr.pos_graph.subgraph(aids) aug_edges = nxu.edge_augmentation(pos_sub, k=1, partial=True) fix_edges.extend(aug_edges) if infr.test_mode: infr.ensure_edges_from(fix_edges) infr.apply_edge_truth(fix_edges) for edge in fix_edges: try: feedback = infr.request_user_review(edge) except ReviewCanceled: raise infr.add_feedback(edge=edge, **feedback) infr.recovery_review_loop(verbose=0)
def testdb2_stuff(): """ tar -zcvf testdb2.tar.gz testdb2/ """ import ibeis ibs = ibeis.opendb('testdb2') #ibs.ensure_contributor_rowids() gid_list = ibs.get_valid_gids() # Group gids by species image_species_list = ut.get_list_column( ibs.unflat_map(ibs.get_annot_species_rowids, ibs.get_image_aids(gid_list)), 0) new_contributor_rowid1 = ibs.add_new_temp_contributor( offset=len(ibs.get_valid_contributor_rowids())) new_contributor_rowid2 = ibs.add_new_temp_contributor( offset=len(ibs.get_valid_contributor_rowids())) gids1, gids2 = list(ut.group_items(gid_list, image_species_list).values()) party_rowids = ibs.add_party(['TestCar1', 'TestCar2']) partyid1, partyid2 = party_rowids ibs.set_image_contributor_rowid(gids1, [new_contributor_rowid1] * len(gids1)) ibs.set_image_contributor_rowid(gids2, [new_contributor_rowid2] * len(gids2)) ibs.set_image_party_rowids(gids1, [partyid1] * len(gids1)) ibs.set_image_party_rowids(gids2, [partyid2] * len(gids2))
def oracle_review(sim): queue_params = { 'pos_diameter': None, 'neg_diameter': None, } infr = sim.infr prev = infr.verbose infr.verbose = 0 # rng = np.random.RandomState(0) infr = sim.infr primary_truth = sim.primary_truth review_edges = infr.generate_reviews(**queue_params) max_reviews = 1000 for count, (aid1, aid2) in enumerate(ut.ProgIter(review_edges)): state = primary_truth.loc[(aid1, aid2)].idxmax() tags = [] infr.add_feedback(aid1, aid2, state, tags, apply=True, rectify=False, user_id='oracle', confidence='absolutely_sure') if count > max_reviews: break infr.verbose = prev sim.results['max_reviews'] = max_reviews n_clusters, n_inconsistent = infr.relabel_using_reviews(rectify=False) assert n_inconsistent == 0, 'should not create any inconsistencies' sim.results['n_user_clusters'] = n_clusters # infr.apply_review_inference() curr_decisions = infr.edge_attr_df('decision') curr_truth = primary_truth.loc[curr_decisions.index].idxmax(axis=1) n_user_mistakes = curr_decisions != curr_truth sim.results['n_user_mistakes'] = sum(n_user_mistakes) gt_clusters = ut.group_pairs(infr.gen_node_attrs('orig_name_label')) curr_clusters = ut.group_pairs(infr.gen_node_attrs('name_label')) compare_results = compare_groups(list(gt_clusters.values()), list(curr_clusters.values())) sim.results.update(ut.map_vals(len, compare_results)) common_per_num = ut.group_items(compare_results['common'], map(len, compare_results['common'])) sumafter = 3 greater = [i for i in common_per_num.keys() if i > sumafter] common_per_num['>%s' % sumafter] = ut.flatten( ut.take(common_per_num, greater)) ut.delete_keys(common_per_num, greater) for k, v in common_per_num.items(): sim.results['common@' + str(k)] = len(v) sim.results['n_names_common'] = len(compare_results['common'])
def view_file_in_directory(fpaths): import utool as ut fpaths = ut.ensure_iterable(fpaths) fnames = [basename(f) for f in fpaths] dpaths = [dirname(f) for f in fpaths] dpath_to_fnames = ut.group_items(fnames, dpaths) for dpath, fnames in dpath_to_fnames.items(): ut.view_directory(dpath, fnames[0], verbose=False)
def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots
def predict_proba_df(verif, edges): """ CommandLine: python -m wbia.algo.graph.demo DummyVerif.predict_edges Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.demo import * # NOQA >>> from wbia.algo.graph import demo >>> import networkx as nx >>> kwargs = dict(num_pccs=40, size=2) >>> infr = demo.demodata_infr(**kwargs) >>> verif = infr.dummy_verif >>> edges = list(infr.graph.edges()) >>> probs = verif.predict_proba_df(edges) >>> #print('scores = %r' % (scores,)) >>> #hashid = ut.hash_data(scores) >>> #print('hashid = %r' % (hashid,)) >>> #assert hashid == 'cdlkytilfeqgmtsihvhqwffmhczqmpil' """ infr = verif.infr edges = list(it.starmap(verif.infr.e_, edges)) prob_cache = infr.task_probs['match_state'] is_miss = np.array([e not in prob_cache for e in edges]) # is_hit = ~is_miss if np.any(is_miss): miss_edges = ut.compress(edges, is_miss) miss_truths = [verif._get_truth(edge) for edge in miss_edges] grouped_edges = ut.group_items(miss_edges, miss_truths, sorted_=False) # Need to make this determenistic too states = [POSTV, NEGTV, INCMP] for key in sorted(grouped_edges.keys()): group = grouped_edges[key] probs0 = randn( shape=[len(group)], rng=verif.rng, a_max=1, a_min=0, **verif.dummy_params[key], ) # Just randomly assign other probs probs1 = verif.rng.rand(len(group)) * (1 - probs0) probs2 = 1 - (probs0 + probs1) for edge, probs in zip(group, zip(probs0, probs1, probs2)): prob_cache[edge] = ut.dzip(states, probs) from wbia.algo.graph import nx_utils as nxu import pandas as pd probs = pd.DataFrame( ut.take(prob_cache, edges), index=nxu.ensure_multi_index(edges, ('aid1', 'aid2')), ) return probs
def level_order(graph): import utool as ut node_to_level = ut.nx_dag_node_rank(graph) #source = ut.nx_source_nodes(graph)[0] #longest_paths = dict([(target, dag_longest_path(graph, source, target)) # for target in graph.nodes()]) #node_to_level = ut.map_dict_vals(len, longest_paths) grouped = ut.group_items(node_to_level.keys(), node_to_level.values()) levels = ut.take(grouped, range(1, len(grouped) + 1)) return levels
def assign_to_words(invindex, idx2_vec): idx2_wx, _idx2_wdist = invindex.wordflann.nn_index(idx2_vec, 1) if True: assign_df = pd.DataFrame(idx2_wx, columns=['wordindex']) grouping = assign_df.groupby('wordindex') wx2_idxs = grouping.wordindex.indices else: # TODO: replace with pandas groupby idx_list = list(range(len(idx2_wx))) wx2_idxs = utool.group_items(idx_list, idx2_wx.tolist()) return wx2_idxs, idx2_wx
def print_factors(model, factor_list): if hasattr(model, 'var2_cpd'): semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] else: semtypes = [0] * len(factor_list) for type_, factors in ut.group_items(factor_list, semtypes).items(): logger.info('Result Factors (%r)' % (type_, )) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow')
def print_dataset_info(data, labels, key): labelhist = {key: len(val) for key, val in ut.group_items(labels, labels).items()} stats_dict = ut.get_stats(data.ravel()) ut.delete_keys(stats_dict, ['shape', 'nMax', 'nMin']) print('[dataset] Dataset Info: ') print('[dataset] * Data:') print('[dataset] %s_data(shape=%r, dtype=%r)' % (key, data.shape, data.dtype)) print('[dataset] %s_memory(data) = %r' % (key, ut.get_object_size_str(data),)) print('[dataset] %s_stats(data) = %s' % (key, ut.repr2(stats_dict, precision=2),)) print('[dataset] * Labels:') print('[dataset] %s_labels(shape=%r, dtype=%r)' % (key, labels.shape, labels.dtype)) print('[dataset] %s_label histogram = %s' % (key, ut.repr2(labelhist)))
def print_factors(model, factor_list): if hasattr(model, 'var2_cpd'): semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] else: semtypes = [0] * len(factor_list) for type_, factors in ut.group_items(factor_list, semtypes).items(): print('Result Factors (%r)' % (type_,)) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow')
def find_nonunique_names(self): fnames = map(basename, self.rel_fpath_list) duplicate_map = ut.find_duplicate_items(fnames) groups = [] for dupname, idxs in duplicate_map.items(): uuids = self.get_prop('uuids', idxs) fpaths = self.get_prop('abs', idxs) groups = ut.group_items(fpaths, uuids) if len(groups) > 1: if all(x == 1 for x in map(len, groups.values())): # All groups are different, this is an simpler case print(ut.repr2(groups, nl=3)) else: # Need to handle the multi-item groups first pass
def get_dependencies(depc, tablename): """ gets level dependences from root to tablename CommandLine: python -m ibeis.depends_cache --exec-get_dependencies --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.depends_cache import * # NOQA >>> depc = testdata_depc() >>> tablename = 'fgweight' >>> result = ut.repr3(depc.get_dependencies(tablename), nl=1) >>> print(result) [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'], ['fgweight'], ] Example: >>> # ENABLE_DOCTEST >>> from ibeis.depends_cache import * # NOQA >>> depc = testdata_depc() >>> tablename = 'spam' >>> result = ut.repr3(depc.get_dependencies(tablename), nl=1) >>> print(result) [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'], ['fgweight'], ['spam'], ] """ root = depc.root_tablename children_, parents_ = list(zip(*depc.get_edges())) child_to_parents = ut.group_items(children_, parents_) to_root = {tablename: ut.paths_to_root(tablename, root, child_to_parents)} from_root = ut.reverse_path(to_root, root, child_to_parents) dependency_levels_ = ut.get_levels(from_root) dependency_levels = ut.longest_levels(dependency_levels_) #print('child_to_parents = %s' % (ut.repr3(child_to_parents),)) #print('to_root = %r' % (to_root,)) #print('from_root = %r' % (from_root,)) return dependency_levels
def check_baseline_results(sim): import networkx as nx infr = sim.infr n_names_possible = 0 real_groups = ut.group_pairs(infr.gen_node_attrs('orig_name_label')) possible_clusters = [] for nid, nodes in real_groups.items(): if len(nodes) == 1: possible_clusters.append(nodes) n_names_possible += 1 continue cc_cand_edges = list(ut.nx_edges_between(infr.graph, nodes)) cc = ut.nx_from_node_edge(nodes, cc_cand_edges) mst = nx.minimum_spanning_tree(cc) ccs = list(nx.connected_components(mst)) possible_clusters.extend(ccs) n_names_possible += (len(ccs)) sumafter = 3 best_possible_compare_results = compare_groups( list(real_groups.values()), list(possible_clusters)) possible_per_num = ut.map_vals( len, ut.group_items(best_possible_compare_results['common'], map(len, best_possible_compare_results['common']))) greater = [i for i in possible_per_num.keys() if i > sumafter] possible_per_num['>%s' % sumafter] = sum( ut.take(possible_per_num, greater)) ut.delete_keys(possible_per_num, greater) for k, v in possible_per_num.items(): sim.results['possible@' + str(k)] = v sim.results['possible'] = len(best_possible_compare_results['common']) # Measure the number of real names in the test (per number of annots) real_per_num = ut.dict_hist(map(len, real_groups.values())) greater = [i for i in real_per_num.keys() if i > sumafter] real_per_num['>%s' % sumafter] = sum(ut.take(real_per_num, greater)) ut.delete_keys(real_per_num, greater) for k, v in real_per_num.items(): sim.results['real@' + str(k)] = v sim.results['n_names_possible'] = n_names_possible sim.results['n_names_real'] = len(real_groups) sim.results['real'] = len(real_groups)
def print_graph_connections(infr, label='orig_name_label'): """ label = 'orig_name_label' """ node_to_label = infr.get_node_attrs(label) label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values()) logger.info('CC info') for name, cc in label_to_nodes.items(): logger.info('\nname = %r' % (name,)) edges = list(nxu.edges_between(infr.graph, cc)) logger.info(infr.get_edge_df_text(edges)) logger.info('CC pair info') for (n1, cc1), (n2, cc2) in it.combinations(label_to_nodes.items(), 2): if n1 == n2: continue logger.info('\nname_pair = {}-vs-{}'.format(n1, n2)) edges = list(nxu.edges_between(infr.graph, cc1, cc2)) logger.info(infr.get_edge_df_text(edges))
def find_clique_edges(infr, label='name_label'): """ Augmenting edges that would complete each the specified cliques. (based on the group inferred from `label`) Args: label (str): node attribute to use as the group id to form the cliques. """ node_to_label = infr.get_node_attrs(label) label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values()) new_edges = [] for label, nodes in label_to_nodes.items(): for edge in it.combinations(nodes, 2): if infr.edge_decision(edge) == UNREV: new_edges.append(edge) # if infr.has_edge(edge): # else: # new_edges.append(edge) return new_edges
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.dict_str(grouped, nl=4)) keytoid = {} for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.repr4(grouped, nl=4)) keytoid = { } for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def __init__(split_index, ibs, daid_list, num_forests=8): print('[nnsindex] make NNSplitIndex over %d annots' % (len(daid_list),)) aid_list = daid_list nid_list = ibs.get_annot_nids(aid_list) #flag_list = ibs.get_annot_exemplar_flag(aid_list) nid2_aids = utool.group_items(aid_list, nid_list) key_list = nid2_aids.keys() aids_list = nid2_aids.values() isunknown_list = ibs.is_nid_unknown(key_list) known_aids = utool.filterfalse_items(aids_list, isunknown_list) uknown_aids = utool.flatten(utool.filter_items(aids_list, isunknown_list)) num_forests_ = min(max(map(len, aids_list)), num_forests) # Put one name per forest forest_aids, overflow_aids = utool.sample_zip(known_aids, num_forests_, allow_overflow=True, per_bin=1) forest_indexes = [] extra_indexes = [] for tx, aids in enumerate(forest_aids): print('[nnsindex] building forest %d/%d with %d aids' % (tx + 1, num_forests_, len(aids))) if len(aids) > 0: nn_index = NNIndex(ibs, aids) forest_indexes.append(nn_index) if len(overflow_aids) > 0: print('[nnsindex] building overflow forest') overflow_index = NNIndex(ibs, overflow_aids) extra_indexes.append(overflow_index) if len(uknown_aids) > 0: print('[nnsindex] building unknown forest') unknown_index = NNIndex(ibs, uknown_aids) extra_indexes.append(unknown_index) #print('[nnsindex] building normalizer forest') # TODO split_index.forest_indexes = forest_indexes split_index.extra_indexes = extra_indexes
def get_dependants(depc, tablename): """ gets level dependences table to the leaves Example: >>> # ENABLE_DOCTEST >>> from ibeis.depends_cache import * # NOQA >>> depc = testdata_depc() >>> tablename = 'chip' >>> result = ut.repr3(depc.get_dependants(tablename), nl=1) >>> print(result) [ ['chip'], ['keypoint'], ['fgweight', 'descriptor'], ['spam'], ] Example: >>> # ENABLE_DOCTEST >>> from ibeis.depends_cache import * # NOQA >>> depc = testdata_depc() >>> tablename = 'spam' >>> result = ut.repr3(depc.get_dependants(tablename), nl=1) >>> print(result) [ ['spam'], ] """ children_, parents_ = list(zip(*depc.get_edges())) parent_to_children = ut.group_items(parents_, children_) to_leafs = {tablename: ut.path_to_leafs(tablename, parent_to_children)} dependency_levels_ = ut.get_levels(to_leafs) dependency_levels = ut.longest_levels(dependency_levels_) return dependency_levels
def find_mst_edges(infr, label='name_label'): """ Returns edges to augment existing PCCs (by label) in order to ensure they are connected with positive edges. CommandLine: python -m wbia.algo.graph.mixin_helpers find_mst_edges --profile Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.mixin_helpers import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='PZ_MTEST') >>> infr = wbia.AnnotInference(ibs, 'all', autoinit=True) >>> label = 'orig_name_label' >>> label = 'name_label' >>> infr.find_mst_edges() >>> infr.ensure_mst() Ignore: old_mst_edges = [ e for e, d in infr.edges(data=True) if d.get('user_id', None) == 'algo:mst' ] infr.graph.remove_edges_from(old_mst_edges) infr.pos_graph.remove_edges_from(old_mst_edges) infr.neg_graph.remove_edges_from(old_mst_edges) infr.incomp_graph.remove_edges_from(old_mst_edges) """ # Find clusters by labels node_to_label = infr.get_node_attrs(label) label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values()) weight_heuristic = infr.ibs is not None if weight_heuristic: annots = infr.ibs.annots(infr.aids) node_to_time = ut.dzip(annots, annots.time) node_to_view = ut.dzip(annots, annots.viewpoint_code) enabled_heuristics = { 'view_weight', 'time_weight', } def _heuristic_weighting(nodes, avail_uv): avail_uv = np.array(avail_uv) weights = np.ones(len(avail_uv)) if 'view_weight' in enabled_heuristics: from vtool import _rhomb_dist view_edge = [(node_to_view[u], node_to_view[v]) for (u, v) in avail_uv] view_weight = np.array( [_rhomb_dist.VIEW_CODE_DIST[(v1, v2)] for (v1, v2) in view_edge] ) # Assume comparable by default and prefer undefined # more than probably not, but less than definately so. view_weight[np.isnan(view_weight)] = 1.5 # Prefer viewpoint 10x more than time weights += 10 * view_weight if 'time_weight' in enabled_heuristics: # Prefer linking annotations closer in time times = ut.take(node_to_time, nodes) maxtime = vt.safe_max(times, fill=1, nans=False) mintime = vt.safe_min(times, fill=0, nans=False) time_denom = maxtime - mintime # Try linking by time for lynx data time_delta = np.array( [abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv] ) time_weight = time_delta / time_denom weights += time_weight weights = np.array(weights) weights[np.isnan(weights)] = 1.0 avail = [(u, v, {'weight': w}) for (u, v), w in zip(avail_uv, weights)] return avail new_edges = [] prog = ut.ProgIter( list(label_to_nodes.keys()), label='finding mst edges', enabled=infr.verbose > 0, ) for nid in prog: nodes = set(label_to_nodes[nid]) if len(nodes) == 1: continue # We want to make this CC connected pos_sub = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = set( it.starmap( e_, it.chain( nxu.edges_inside(infr.neg_graph, nodes), nxu.edges_inside(infr.incomp_graph, nodes), # nxu.edges_inside(infr.unknown_graph, nodes), ), ) ) if len(impossible) == 0 and not weight_heuristic: # Simple mst augmentation aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1)) else: complement = it.starmap(e_, nxu.complement_edges(pos_sub)) avail_uv = [(u, v) for u, v in complement if (u, v) not in impossible] if weight_heuristic: # Can do heuristic weighting to improve the MST avail = _heuristic_weighting(nodes, avail_uv) else: avail = avail_uv # logger.info(len(pos_sub)) try: aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1, avail=avail)) except nx.NetworkXUnfeasible: logger.info('Warning: MST augmentation is not feasible') logger.info('explicit negative edges might disconnect a PCC') aug_edges = list( nxu.k_edge_augmentation(pos_sub, k=1, avail=avail, partial=True) ) new_edges.extend(aug_edges) prog.ensure_newline() for edge in new_edges: assert not infr.graph.has_edge(*edge), 'alrady have edge={}'.format(edge) return new_edges
def __init__(self, tokens): if isinstance(tokens, six.string_types): tokens = tokenize_manacost(tokens) vals = ut.get_list_column(tokens, 0) types = ut.get_list_column(tokens, 1) self.type2_manas = dict(ut.group_items(vals, types))
def ingest_serengeti_mamal_cameratrap(species): """ Downloads data from Serengeti dryad server References: http://datadryad.org/resource/doi:10.5061/dryad.5pt92 Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015) Snapshot Serengeti, high-frequency annotated camera trap images of 40 mammalian species in an African savanna. Scientific Data 2: 150026. http://dx.doi.org/10.1038/sdata.2015.26 Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015) Data from: Snapshot Serengeti, high-frequency annotated camera trap images of 40 mammalian species in an African savanna. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.5pt92 Args: species (?): CommandLine: python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species zebra_plains python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species cheetah Example: >>> # SCRIPT >>> from ibeis.dbio.ingest_database import * # NOQA >>> import ibeis >>> species = ut.get_argval('--species', type_=str, default=ibeis.const.TEST_SPECIES.ZEB_PLAIN) >>> # species = ut.get_argval('--species', type_=str, default='cheetah') >>> result = ingest_serengeti_mamal_cameratrap(species) >>> print(result) """ 'https://snapshotserengeti.s3.msi.umn.edu/' import ibeis if species is None: code = 'ALL' elif species == 'zebra_plains': code = 'PZ' elif species == 'cheetah': code = 'CHTH' else: raise NotImplementedError() if species == 'zebra_plains': serengeti_sepcies = 'zebra' else: serengeti_sepcies = species print('species = %r' % (species,)) print('serengeti_sepcies = %r' % (serengeti_sepcies,)) dbname = code + '_Serengeti' print('dbname = %r' % (dbname,)) dbdir = ut.ensuredir(join(ibeis.sysres.get_workdir(), dbname)) print('dbdir = %r' % (dbdir,)) image_dir = ut.ensuredir(join(dbdir, 'images')) base_url = 'http://datadryad.org/bitstream/handle/10255' all_images_url = base_url + '/dryad.86392/all_images.csv' consensus_metadata_url = base_url + '/dryad.86348/consensus_data.csv' search_effort_url = base_url + '/dryad.86347/search_effort.csv' gold_standard_url = base_url + '/dryad.76010/gold_standard_data.csv' all_images_fpath = ut.grab_file_url(all_images_url, download_dir=dbdir) consensus_metadata_fpath = ut.grab_file_url(consensus_metadata_url, download_dir=dbdir) search_effort_fpath = ut.grab_file_url(search_effort_url, download_dir=dbdir) gold_standard_fpath = ut.grab_file_url(gold_standard_url, download_dir=dbdir) print('all_images_fpath = %r' % (all_images_fpath,)) print('consensus_metadata_fpath = %r' % (consensus_metadata_fpath,)) print('search_effort_fpath = %r' % (search_effort_fpath,)) print('gold_standard_fpath = %r' % (gold_standard_fpath,)) def read_csv(csv_fpath): import utool as ut csv_text = ut.read_from(csv_fpath) csv_lines = csv_text.split('\n') print(ut.list_str(csv_lines[0:2])) csv_data = [[field.strip('"').strip('\r') for field in line.split(',')] for line in csv_lines if len(line) > 0] csv_header = csv_data[0] csv_data = csv_data[1:] return csv_data, csv_header def download_image_urls(image_url_info_list): # Find ones that we already have print('Requested %d downloaded images' % (len(image_url_info_list))) full_gpath_list = [join(image_dir, basename(gpath)) for gpath in image_url_info_list] exists_list = [ut.checkpath(gpath) for gpath in full_gpath_list] image_url_info_list_ = ut.compress(image_url_info_list, ut.not_list(exists_list)) print('Already have %d/%d downloaded images' % ( len(image_url_info_list) - len(image_url_info_list_), len(image_url_info_list))) print('Need to download %d images' % (len(image_url_info_list_))) #import sys #sys.exit(0) # Download the rest imgurl_prefix = 'https://snapshotserengeti.s3.msi.umn.edu/' image_url_list = [imgurl_prefix + suffix for suffix in image_url_info_list_] for img_url in ut.ProgressIter(image_url_list, lbl='Downloading image'): ut.grab_file_url(img_url, download_dir=image_dir) return full_gpath_list # Data contains information about which events have which animals if False: species_class_csv_data, species_class_header = read_csv(gold_standard_fpath) species_class_eventid_list = ut.get_list_column(species_class_csv_data, 0) #gold_num_species_annots_list = ut.get_list_column(gold_standard_csv_data, 2) species_class_species_list = ut.get_list_column(species_class_csv_data, 2) #gold_count_list = ut.get_list_column(gold_standard_csv_data, 3) else: species_class_csv_data, species_class_header = read_csv(consensus_metadata_fpath) species_class_eventid_list = ut.get_list_column(species_class_csv_data, 0) species_class_species_list = ut.get_list_column(species_class_csv_data, 7) # Find the zebra events serengeti_sepcies_set = sorted(list(set(species_class_species_list))) print('serengeti_sepcies_hist = %s' % ut.dict_str(ut.dict_hist(species_class_species_list), key_order_metric='val')) #print('serengeti_sepcies_set = %s' % (ut.list_str(serengeti_sepcies_set),)) assert serengeti_sepcies in serengeti_sepcies_set, 'not a known seregeti species' species_class_chosen_idx_list = ut.list_where( [serengeti_sepcies == species_ for species_ in species_class_species_list]) chosen_eventid_list = ut.take(species_class_eventid_list, species_class_chosen_idx_list) print('Number of chosen species:') print(' * len(species_class_chosen_idx_list) = %r' % (len(species_class_chosen_idx_list),)) print(' * len(chosen_eventid_list) = %r' % (len(chosen_eventid_list),)) # Read info about which events have which images images_csv_data, image_csv_header = read_csv(all_images_fpath) capture_event_id_list = ut.get_list_column(images_csv_data, 0) image_url_info_list = ut.get_list_column(images_csv_data, 1) # Group photos by eventid eventid_to_photos = ut.group_items(image_url_info_list, capture_event_id_list) # Filter to only chosens unflat_chosen_url_infos = ut.dict_take(eventid_to_photos, chosen_eventid_list) chosen_url_infos = ut.flatten(unflat_chosen_url_infos) image_url_info_list = chosen_url_infos chosen_path_list = download_image_urls(chosen_url_infos) ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=True) gid_list_ = ibs.add_images(chosen_path_list, auto_localize=False) # NOQA # Attempt to automatically detect the annotations #aids_list = ibs.detect_random_forest(gid_list_, species) #aids_list #if False: # # remove non-zebra photos # from os.path import basename # base_gname_list = list(map(basename, zebra_url_infos)) # all_gname_list = ut.list_images(image_dir) # nonzebra_gname_list = ut.setdiff_ordered(all_gname_list, base_gname_list) # nonzebra_gpath_list = ut.fnames_to_fpaths(nonzebra_gname_list, image_dir) # ut.remove_fpaths(nonzebra_gpath_list) return ibs
def reasign_names1(ibs, aid_list=None, old_img2_names=None, common_prefix=''): r""" Changes the names in the IA-database to correspond to an older naming convention. If splits and merges were preformed tries to find the maximally consistent renaming scheme. Notes: For each annotation: * get the image * get the image full path * strip the full path down to the file name prefix: [ example /foo/bar/pic.jpg -> pic ] * make the name of the individual associated with that annotation be the file name prefix * save the new names to the image analysis database * wildbook will make a request to get all of the annotations, image file names, image names and animal ids CommandLine: python -m ibeis.scripts.name_recitifer rectify_names --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aid_list = None >>> common_prefix = '' >>> old_img2_names = None #['img_fred.png', '] >>> result = reasign_names1(ibs, aid_list, img_list, name_list) """ if aid_list is None: aid_list = ibs.get_valid_aids() # Group annotations by their current IA-name nid_list = ibs.get_annot_name_rowids(aid_list) nid2_aids = ut.group_items(aid_list, nid_list) unique_nids = list(nid2_aids.keys()) grouped_aids = list(nid2_aids.values()) # Get grouped images grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids) # Assume a mapping from old image names to old names is given. # Or just hack it in the Lewa case. if old_img2_names is None: def get_name_from_gname(gname): from os.path import splitext gname_, ext = splitext(gname) assert gname_.startswith(common_prefix), 'prefix assumption is invalidated' gname_ = gname_[len(common_prefix):] return gname_ # Create mapping from image name to the desired "name" for the image. old_img2_names = {gname: get_name_from_gname(gname) for gname in ut.flatten(grouped_imgnames)} # Make the name of the individual associated with that annotation be the file name prefix grouped_oldnames = [ut.take(old_img2_names, gnames) for gnames in grouped_imgnames] # The task is now to map each name in unique_nids to one of these names # subject to the contraint that each name can only be used once. This is # solved using a maximum bipartite matching. The new names are the left # nodes, the old name are the right nodes, and grouped_oldnames definse the # adjacency matrix. # NOTE: In rare cases it may be impossible to find a correct labeling using # only old names. In this case new names will be created. new_name_text = find_consistent_labeling(grouped_oldnames) dry = False if not dry: # Save the new names to the image analysis database ibs.set_name_texts(unique_nids, new_name_text)
def download_sharks(XMLdata, number): """ cd ~/work/WS_ALL python -m ibeis.scripts.getshark >>> from ibeis.scripts.getshark import * # NOQA >>> url = 'www.whaleshark.org/listImages.jsp' >>> XMLdata = ut.url_read(url) >>> number = None """ # Prepare the output directory for writing, if it doesn't exist output_dir = 'sharkimages' ut.ensuredir(output_dir) dom = parseString(XMLdata) # Download files if number: maxCount = min(number, len(dom.getElementsByTagName('img'))) else: maxCount = len(dom.getElementsByTagName('img')) parsed_info = dict( img_url_list=[], localid_list=[], nameid_list=[], orig_fname_list=[], new_fname_list=[], ) print('Preparing to fetch %i files...' % maxCount) for shark in dom.getElementsByTagName('shark'): localCount = 0 for imageset in shark.getElementsByTagName('imageset'): for img in imageset.getElementsByTagName('img'): localCount += 1 img_url = img.getAttribute('href') orig_fname = split(img_url)[1] ext = splitext(orig_fname)[1].lower() nameid = shark.getAttribute('number') new_fname = '%s-%i%s' % ( nameid, localCount, ext) parsed_info['img_url_list'].append(img_url) parsed_info['nameid_list'].append(nameid) parsed_info['localid_list'].append(localCount) parsed_info['orig_fname_list'].append(orig_fname) parsed_info['new_fname_list'].append(new_fname) print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount)) if number is not None and len(parsed_info['orig_fname_list']) == number: break parsed_info['new_fpath_list'] = [join(output_dir, _fname) for _fname in parsed_info['new_fname_list']] print('Filtering parsed images') # Filter based on image type (keep only jpgs) ext_flags = [_fname.endswith('.jpg') or _fname.endswith('.jpg') for _fname in parsed_info['new_fname_list']] parsed_info = {key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items()} # Filter to only images matching the appropriate tags from ibeis import tag_funcs parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list']) tag_flags = tag_funcs.filterflags_general_tags( parsed_info['tags_list'], has_any=['view-left'], none_match=['qual.*', 'view-top', 'part-.*', 'cropped'], ) parsed_info = {key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items()} print('Tags in chosen images:') print(ut.dict_hist(ut.flatten(parsed_info['tags_list'] ))) # Download selected subset print('Downloading selected subset') _iter = list(zip(parsed_info['img_url_list'], parsed_info['new_fpath_list'])) _iter = ut.ProgressIter(_iter, lbl='downloading sharks') for img_url, new_fpath in _iter: if not exists(new_fpath): ut.download_url(img_url, new_fpath) # Remove corrupted or ill-formatted images print('Checking for corrupted images') import vtool as vt noncorrupt_flags = vt.filterflags_valid_images(parsed_info['new_fpath_list']) parsed_info = { key: ut.compress(list_, noncorrupt_flags) for key, list_ in parsed_info.items() } print('Removing small images') import numpy as np imgsize_list = np.array([vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']]) sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1)) areq_flags_list = sqrt_area_list >= 750 parsed_info = {key: ut.compress(list_, areq_flags_list) for key, list_ in parsed_info.items()} grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))), parsed_info['nameid_list']) keep_idxs = sorted(ut.flatten([idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2])) parsed_info = {key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items()} print('Moving imagse to secondary directory') named_outputdir = 'named-left-sharkimages' # Build names parsed_info['namedir_fpath_list'] = [ join(named_outputdir, _nameid, _fname) for _fname, _nameid in zip(parsed_info['new_fname_list'], parsed_info['nameid_list'])] # Create directories ut.ensuredir(named_outputdir) named_dirs = ut.unique_ordered(list(map(dirname, parsed_info['namedir_fpath_list']))) for dir_ in named_dirs: ut.ensuredir(dir_) # Copy ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'], dst_fpath_list=parsed_info['namedir_fpath_list'])
def nx_agraph_layout(graph, orig_graph=None, inplace=False, verbose=None, **kwargs): r""" orig_graph = graph graph = layout_graph References: http://www.graphviz.org/content/attrs http://www.graphviz.org/doc/info/attrs.html """ import networkx as nx import pygraphviz kwargs = kwargs.copy() prog = kwargs.pop('prog', 'dot') if prog != 'dot': kwargs['overlap'] = kwargs.get('overlap', 'false') kwargs['splines'] = kwargs.get('splines', 'spline') kwargs['notranslate'] = 'true' # for neato postprocessing argparts = ['-G%s=%s' % (key, str(val)) for key, val in kwargs.items()] args = ' '.join(argparts) splines = kwargs['splines'] if verbose is None: verbose = ut.VERBOSE if verbose: print('args = %r' % (args,)) # Convert to agraph format graph_ = graph.copy() ut.nx_ensure_agraph_color(graph_) # Reduce size to be in inches not pixels # FIXME: make robust to param settings # Hack to make the w/h of the node take thae max instead of # dot which takes the minimum shaped_nodes = [n for n, d in graph_.nodes(data=True) if 'width' in d] node_attrs = ut.dict_take(graph_.node, shaped_nodes) width_px = np.array(ut.take_column(node_attrs, 'width')) height_px = np.array(ut.take_column(node_attrs, 'height')) scale = np.array(ut.dict_take_column(node_attrs, 'scale', default=1.0)) width_in = width_px / 72.0 * scale height_in = height_px / 72.0 * scale width_in_dict = dict(zip(shaped_nodes, width_in)) height_in_dict = dict(zip(shaped_nodes, height_in)) nx.set_node_attributes(graph_, 'width', width_in_dict) nx.set_node_attributes(graph_, 'height', height_in_dict) ut.nx_delete_node_attr(graph_, 'scale') # Check for any nodes with groupids node_to_groupid = nx.get_node_attributes(graph_, 'groupid') if node_to_groupid: groupid_to_nodes = ut.group_items(*zip(*node_to_groupid.items())) else: groupid_to_nodes = {} # Initialize agraph format #import utool #utool.embed() ut.nx_delete_None_edge_attr(graph_) agraph = nx.nx_agraph.to_agraph(graph_) # Add subgraphs labels # TODO: subgraph attrs group_attrs = graph.graph.get('groupattrs', {}) for groupid, nodes in groupid_to_nodes.items(): # subgraph_attrs = {} subgraph_attrs = group_attrs.get(groupid, {}).copy() cluster_flag = True # FIXME: make this more natural to specify if 'cluster' in subgraph_attrs: cluster_flag = subgraph_attrs['cluster'] del subgraph_attrs['cluster'] # subgraph_attrs = dict(rankdir='LR') # subgraph_attrs = dict(rankdir='LR') # subgraph_attrs['rank'] = 'min' # subgraph_attrs['rank'] = 'source' name = groupid if cluster_flag: # graphviz treast subgraphs labeld with cluster differently name = 'cluster_' + groupid else: name = groupid agraph.add_subgraph(nodes, name, **subgraph_attrs) for node in graph_.nodes(): # force pinning of node points anode = pygraphviz.Node(agraph, node) if anode.attr['pin'] == 'true': if anode.attr['pos'] is not None and len(anode.attr['pos']) > 0 and not anode.attr['pos'].endswith('!'): import re #utool.embed() ptstr_ = anode.attr['pos'] #print('ptstr_ = %r' % (ptstr_,)) ptstr = ptstr_.strip('[]').strip(' ').strip('()') #print('ptstr = %r' % (ptstr,)) ptstr_list = [x.rstrip(',') for x in re.split(r'\s+', ptstr)] #print('ptstr_list = %r' % (ptstr_list,)) pt_list = list(map(float, ptstr_list)) #print('pt_list = %r' % (pt_list,)) pt_arr = np.array(pt_list) / 72.0 #print('pt_arr = %r' % (pt_arr,)) new_ptstr_list = list(map(str, pt_arr)) new_ptstr = ','.join(new_ptstr_list) + '!' #print('new_ptstr = %r' % (new_ptstr,)) anode.attr['pos'] = new_ptstr # Run layout #print('prog = %r' % (prog,)) if ut.VERBOSE or verbose > 0: print('BEFORE LAYOUT\n' + str(agraph)) agraph.layout(prog=prog, args=args) agraph.draw(ut.truepath('~/test_graphviz_draw.png')) if ut.VERBOSE or verbose > 1: print('AFTER LAYOUT\n' + str(agraph)) # TODO: just replace with a single dict of attributes node_layout_attrs = ut.ddict(dict) edge_layout_attrs = ut.ddict(dict) #for node in agraph.nodes(): for node in graph_.nodes(): anode = pygraphviz.Node(agraph, node) node_attrs = parse_anode_layout_attrs(anode) for key, val in node_attrs.items(): node_layout_attrs[key][node] = val edges = list(ut.nx_edges(graph_, keys=True)) for edge in edges: aedge = pygraphviz.Edge(agraph, *edge) edge_attrs = parse_aedge_layout_attrs(aedge) for key, val in edge_attrs.items(): edge_layout_attrs[key][edge] = val if orig_graph is not None and kwargs.get('draw_implicit', True): # ADD IN IMPLICIT EDGES layout_edges = set(ut.nx_edges(graph_, keys=True)) orig_edges = set(ut.nx_edges(orig_graph, keys=True)) implicit_edges = list(orig_edges - layout_edges) #all_edges = list(set.union(orig_edges, layout_edges)) needs_implicit = len(implicit_edges) > 0 if needs_implicit: # Pin down positions for node in agraph.nodes(): anode = pygraphviz.Node(agraph, node) anode.attr['pin'] = 'true' anode.attr['pos'] += '!' # Add new edges to route for iedge in implicit_edges: data = orig_graph.get_edge_data(*iedge) agraph.add_edge(*iedge, **data) if ut.VERBOSE or verbose: print('BEFORE IMPLICIT LAYOUT\n' + str(agraph)) # Route the implicit edges (must use neato) control_node = pygraphviz.Node(agraph, node) #print('control_node = %r' % (control_node,)) node1_attr1 = parse_anode_layout_attrs(control_node) #print('node1_attr1 = %r' % (node1_attr1,)) implicit_kw = kwargs.copy() implicit_kw['overlap'] = 'true' #del implicit_kw['overlap'] # can cause node positions to change argparts = ['-G%s=%s' % (key, str(val)) for key, val in implicit_kw.items()] args = ' '.join(argparts) #print('args = %r' % (args,)) #import utool #utool.embed() agraph.layout(prog='neato', args='-n ' + args) agraph.draw(ut.truepath('~/implicit_test_graphviz_draw.png')) if ut.VERBOSE or verbose: print('AFTER IMPLICIT LAYOUT\n' + str(agraph)) control_node = pygraphviz.Node(agraph, node) print('control_node = %r' % (control_node,)) node1_attr2 = parse_anode_layout_attrs(control_node) print('node1_attr2 = %r' % (node1_attr2,)) # graph positions shifted # This is not the right place to divide by 72 translation = (node1_attr1['pos'] - node1_attr2['pos'] ) #print('translation = %r' % (translation,)) #translation = np.array([0, 0]) print('translation = %r' % (translation,)) #for iedge in all_edges: for iedge in implicit_edges: aedge = pygraphviz.Edge(agraph, *iedge) iedge_attrs = parse_aedge_layout_attrs(aedge, translation) for key, val in iedge_attrs.items(): edge_layout_attrs[key][iedge] = val graph_layout_attrs = dict( splines=splines ) layout_info = { 'graph': graph_layout_attrs, 'edge': dict(edge_layout_attrs), 'node': dict(node_layout_attrs), } if inplace: if orig_graph is not None: graph = orig_graph apply_graph_layout_attrs(graph, layout_info) return graph, layout_info
def reasign_names2(ibs, gname_name_pairs, aid_list=None): """ Notes: * Given a list of pairs: image file names (full path), animal name. * Go through all the images in the database and create a dictionary that associates the file name (full path) of the image in the database with the annotation or annotations associated with that image. * Go through the list of pairs: For each image file name, look up in the dictionary the image file name and assign the annotation associated with the image file name the animal name * Throughout this, keep a list of annotations that have been changed * Wildbook will issue a pull request to get these annotation. Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aid_list = None >>> common_prefix = '' >>> gname_name_pairs = [ >>> ('easy1.JPG', 'easy'), >>> ('easy2.JPG', 'easy'), >>> ('easy3.JPG', 'easy'), >>> ('hard1.JPG', 'hard') >>> ] >>> changed_pairs = reasign_names2(gname_name_pairs) """ from os.path import basename if aid_list is None: aid_list = ibs.get_valid_aids() annot_gnames = ibs.get_annot_image_names(aid_list) # Other image name getters that may be useful # ibs.get_annot_image_paths(aid_list) # ibs.get_image_uris_original(ibs.get_annot_gids(aid_list)) gname2_aids = ut.group_items(aid_list, annot_gnames) changed_aids = [] changed_names = [] for gname, name in gname_name_pairs: # make sure its just the last part of the name. # Ignore preceding path gname = basename(gname) aids = gname2_aids[gname] texts = ibs.get_annot_name_texts(aids) flags = [text != name for text in texts] aids_ = ut.compress(aids, flags) if len(aids_): changed_aids.extend(aids_) changed_names.extend([name] * len(aids_)) dry = False if not dry: # Save the new names to the image analysis database ibs.set_annot_name_texts(changed_aids, changed_names) # Returned list tells you who was changed. changed_pairs = list(zip(changed_names, changed_aids)) return changed_pairs
def reasign_names1(ibs, aid_list=None, old_img2_names=None, common_prefix=''): r""" Changes the names in the IA-database to correspond to an older naming convention. If splits and merges were preformed tries to find the maximally consistent renaming scheme. Notes: For each annotation: * get the image * get the image full path * strip the full path down to the file name prefix: [ example /foo/bar/pic.jpg -> pic ] * make the name of the individual associated with that annotation be the file name prefix * save the new names to the image analysis database * wildbook will make a request to get all of the annotations, image file names, image names and animal ids CommandLine: python -m ibeis.scripts.name_recitifer rectify_names --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aid_list = None >>> common_prefix = '' >>> old_img2_names = None #['img_fred.png', '] >>> result = reasign_names1(ibs, aid_list, img_list, name_list) """ if aid_list is None: aid_list = ibs.get_valid_aids() # Group annotations by their current IA-name nid_list = ibs.get_annot_name_rowids(aid_list) nid2_aids = ut.group_items(aid_list, nid_list) unique_nids = list(nid2_aids.keys()) grouped_aids = list(nid2_aids.values()) # Get grouped images grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids) # Assume a mapping from old image names to old names is given. # Or just hack it in the Lewa case. if old_img2_names is None: def get_name_from_gname(gname): from os.path import splitext gname_, ext = splitext(gname) assert gname_.startswith(common_prefix), ( 'prefix assumption is invalidated') gname_ = gname_[len(common_prefix):] return gname_ # Create mapping from image name to the desired "name" for the image. old_img2_names = { gname: get_name_from_gname(gname) for gname in ut.flatten(grouped_imgnames) } # Make the name of the individual associated with that annotation be the # file name prefix grouped_oldnames = [ ut.take(old_img2_names, gnames) for gnames in grouped_imgnames ] # The task is now to map each name in unique_nids to one of these names # subject to the contraint that each name can only be used once. This is # solved using a maximum bipartite matching. The new names are the left # nodes, the old name are the right nodes, and grouped_oldnames definse the # adjacency matrix. # NOTE: In rare cases it may be impossible to find a correct labeling using # only old names. In this case new names will be created. new_name_text = find_consistent_labeling(grouped_oldnames) dry = False if not dry: # Save the new names to the image analysis database ibs.set_name_texts(unique_nids, new_name_text)
def estimate_twoday_count(ibs, day1, day2, filter_kw): #gid_list = ibs.get_valid_gids() all_images = ibs.images() dates = [dt.date() for dt in all_images.datetime] date_to_images = all_images.group_items(dates) date_to_images = ut.sort_dict(date_to_images) #date_hist = ut.map_dict_vals(len, date2_gids) #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),)) verbose = 0 visit_dates = [day1, day2] visit_info_list_ = [] for day in visit_dates: images = date_to_images[day] aids = ut.flatten(images.aids) aids = ibs.filter_annots_general(aids, filter_kw=filter_kw, verbose=verbose) nids = ibs.get_annot_name_rowids(aids) grouped_aids = ut.group_items(aids, nids) unique_nids = ut.unique(list(grouped_aids.keys())) if False: aids_list = ut.take(grouped_aids, unique_nids) for aids in aids_list: if len(aids) > 30: break timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list) # Do the five second rule marked_thresh = 5 flags = [] for nid, timedeltas in zip(unique_nids, timedeltas_list): flags.append(timedeltas.max() > marked_thresh) print('Unmarking %d names' % (len(flags) - sum(flags))) unique_nids = ut.compress(unique_nids, flags) grouped_aids = ut.dict_subset(grouped_aids, unique_nids) unique_aids = ut.flatten(list(grouped_aids.values())) info = { 'unique_nids': unique_nids, 'grouped_aids': grouped_aids, 'unique_aids': unique_aids, } visit_info_list_.append(info) # Estimate statistics from ibeis.other import dbinfo aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) nsight1 = len(nids_day1) nsight2 = len(nids_day2) resight = len(resight_nids) lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight) if False: from ibeis.other import dbinfo print('DAY 1 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1) # NOQA print('DAY 2 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2) # NOQA print('COMBINED STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2) # NOQA print('%d annots on day 1' % (len(aids_day1)) ) print('%d annots on day 2' % (len(aids_day2)) ) print('%d names on day 1' % (nsight1,)) print('%d names on day 2' % (nsight2,)) print('resight = %r' % (resight,)) print('lp_index = %r ± %r' % (lp_index, lp_error)) return nsight1, nsight2, resight, lp_index, lp_error
def mark_unreviewed_above_score_as_correct(qres_wgt): selected_qtindex_list = qres_wgt.selectedRows() if len(selected_qtindex_list) == 1: qtindex = selected_qtindex_list[0] # aid1, aid2 = qres_wgt.get_aidpair_from_qtindex(qtindex) thresh = qtindex.model().get_header_data('score', qtindex) logger.info('thresh = %r' % (thresh, )) rows = qres_wgt.review_api.ider() scores_ = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('score'), rows) valid_rows = ut.compress(rows, scores_ >= thresh) aids1 = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('qaid'), valid_rows) aids2 = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('aid'), valid_rows) # ibs = qres_wgt.ibs ibs = qres_wgt.ibs am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey( aids1, aids2) reviewed = ibs.get_annotmatch_reviewed(am_rowids) unreviewed = ut.not_list(reviewed) valid_rows = ut.compress(valid_rows, unreviewed) aids1 = ut.compress(aids1, unreviewed) aids2 = ut.compress(aids2, unreviewed) import networkx as nx graph = nx.Graph() graph.add_edges_from(list(zip(aids1, aids2)), {'user_thresh_match': True}) review_groups = list(nx.connected_component_subgraphs(graph)) changing_aids = list(graph.nodes()) nids = ibs.get_annot_nids(changing_aids) nid2_aids = ut.group_items(changing_aids, nids) for nid, aids in nid2_aids.items(): # Connect all original names in the database to denote merges for u, v in ut.itertwo(aids): graph.add_edge(u, v) dbside_groups = list(nx.connected_component_subgraphs(graph)) options = [ 'Accept', # 'Review More' ] msg = (ut.codeblock(""" There are %d names and %d annotations in this mass review set. Mass review has discovered %d internal groups. Accepting will induce a database grouping of %d names. """) % ( len(nid2_aids), len(changing_aids), len(review_groups), len(dbside_groups), )) reply = gt.user_option(msg=msg, options=options) if reply == options[0]: # This is not the smartest way to group names. # Ideally what will happen here, is that reviewed edges will go into # the new graph name inference algorithm. # then the chosen point will be used as the threshold. Then # the graph cut algorithm will be applied. logger_ = qres_wgt.logger logger_.debug(msg) logger_.info('START MASS_THRESHOLD_MERGE') logger_.info('num_groups=%d thresh=%r' % ( len(dbside_groups), thresh, )) for count, subgraph in enumerate(dbside_groups): thresh_aid_pairs = [ edge for edge, flag in nx.get_edge_attributes( graph, 'user_thresh_match').items() if flag ] thresh_uuid_pairs = ibs.unflat_map(ibs.get_annot_uuids, thresh_aid_pairs) aids = list(subgraph.nodes()) nids = ibs.get_annot_name_rowids(aids) flags = ut.not_list(ibs.is_aid_unknown(aids)) previous_names = ibs.get_name_texts(nids) valid_nids = ut.compress(nids, flags) if len(valid_nids) == 0: merge_nid = ibs.make_next_nids(num=1)[0] type_ = 'new' else: merge_nid = min(valid_nids) type_ = 'existing' # Need to find other non-exemplar / query names that may # need merging other_aids = ibs.get_name_aids(valid_nids) other_aids = set(ut.flatten(other_aids)) - set(aids) other_auuids = ibs.get_annot_uuids(other_aids) other_previous_names = ibs.get_annot_names(other_aids) merge_name = ibs.get_name_texts(merge_nid) annot_uuids = ibs.get_annot_uuids(aids) ### # Set as reviewed (so we dont see them again), but mark it # with a different code to denote that it was a MASS review aid1_list = ut.take_column(thresh_aid_pairs, 0) aid2_list = ut.take_column(thresh_aid_pairs, 1) am_rowids = ibs.add_annotmatch_undirected( aid1_list, aid2_list) ibs.set_annotmatch_reviewer( am_rowids, ['algo:lnbnn_thresh'] * len(am_rowids)) logger_.info('START GROUP %d' % (count, )) logger_.info( 'GROUP BASED ON %d ANNOT_PAIRS WITH SCORE ABOVE (thresh=%r)' % ( len(thresh_uuid_pairs), thresh, )) logger_.debug('(uuid_pairs=%r)' % (thresh_uuid_pairs)) logger_.debug('(merge_name=%r)' % (merge_name)) logger_.debug( 'CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)' % ( len(annot_uuids), annot_uuids, previous_names, type_, merge_name, )) logger_.debug( 'ADDITIONAL CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)' % ( len(other_auuids), other_auuids, other_previous_names, type_, merge_name, )) logger_.info('END GROUP %d' % (count, )) new_nids = [merge_nid] * len(aids) ibs.set_annot_name_rowids(aids, new_nids) logger_.info('END MASS_THRESHOLD_MERGE') else: logger.info('[context] Multiple %d selection' % (len(selected_qtindex_list), ))
def wildbook_signal_annot_name_changes(ibs, aid_list=None, wb_target=None, dryrun=False): r""" Args: aid_list (int): list of annotation ids(default = None) tomcat_dpath (None): (default = None) wb_target (None): (default = None) dryrun (bool): (default = False) CommandLine: python -m ibeis wildbook_signal_annot_name_changes:0 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 --dryrun python -m ibeis wildbook_signal_annot_name_changes:1 python -m ibeis wildbook_signal_annot_name_changes:2 Setup: >>> wb_target = None >>> dryrun = ut.get_argflag('--dryrun') Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where some names change, some do not. There are no new names. >>> old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> new_nid_list = ut.list_roll(old_nid_list, 1) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> #gid_list = ibs.get_valid_gids()[0:10] >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> # Test case where all names change to one known name >>> #old_nid_list = ibs.get_annot_name_rowids(aid_list) >>> #new_nid_list = [old_nid_list[0]] * len(old_nid_list) >>> old_nid_list = [1, 2] >>> new_nid_list = [1, 1] >>> print('old_nid_list = %r' % (old_nid_list,)) >>> print('new_nid_list = %r' % (new_nid_list,)) >>> ibs.set_annot_name_rowids(aid_list, new_nid_list) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) >>> # Undo changes here (not undone in wildbook) >>> #ibs.set_annot_name_rowids(aid_list, old_nid_list) Example: >>> # DISABLE_DOCTEST >>> from ibeis.control.manual_wildbook_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> gid_list = ibs.get_valid_gids()[3:5] >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list)) >>> old_nid_list = [1, 2] >>> ibs.set_annot_name_rowids(aid_list, old_nid_list) >>> # Signal what currently exists (should put them back to normal) >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun) """ print('[ibs.wildbook_signal_imgsetid_list] signaling annot name changes to wildbook') wb_url = ibs.get_wildbook_base_url(wb_target) try: ibs.assert_ia_available_for_wb(wb_target) except Exception: pass if aid_list is None: aid_list = ibs.get_valid_aids(is_known=True) annot_uuid_list = ibs.get_annot_uuids(aid_list) annot_name_text_list = ibs.get_annot_name_texts(aid_list) grouped_uuids = ut.group_items(annot_uuid_list, annot_name_text_list) url = wb_url + '/ia' payloads = [ {'resolver': {'assignNameToAnnotations': { 'name': new_name, 'annotationIds' : ut.lmap(str, annot_uuids), }}} for new_name, annot_uuids in grouped_uuids.items() ] status_list = [] for json_payload in ut.ProgressIter(payloads, lbl='submitting URL', freq=1): print('[_send] URL=%r with json_payload=%r' % (url, json_payload)) if dryrun: status = False else: response = requests.post(url, json=json_payload) status = response.status_code == 200 if not status: print('Failed to push new names') print(response.text) status_list.append(status) return status_list
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def check_results(ibs_gt, ibs2, aid1_to_aid2, aids_list1_, incinfo): """ reports how well the incremental query ran when the oracle was calling the shots. """ print('--------- CHECKING RESULTS ------------') testcases = incinfo.get('testcases') if testcases is not None: count_dict = ut.count_dict_vals(testcases) print('+--') #print(ut.dict_str(testcases)) print('---') print(ut.dict_str(count_dict)) print('L__') # TODO: dont include initially added aids in the result reporting aid_list1 = aids_list1_ # ibs_gt.get_valid_aids() #aid_list1 = ibs_gt.get_aids_with_groundtruth() aid_list2 = ibs2.get_valid_aids() nid_list1 = ibs_gt.get_annot_nids(aid_list1) nid_list2 = ibs2.get_annot_nids(aid_list2) # Group annotations from test and gt database by their respective names grouped_dict1 = ut.group_items(aid_list1, nid_list1) grouped_dict2 = ut.group_items(aid_list2, nid_list2) grouped_aids1 = list(six.itervalues(grouped_dict1)) grouped_aids2 = list(map(tuple, six.itervalues(grouped_dict2))) #group_nids1 = list(six.iterkeys(grouped_dict1)) #group_nids2 = list(six.iterkeys(grouped_dict2)) # Transform annotation ids from database1 space to database2 space grouped_aids1_t = [tuple(ut.dict_take_list(aid1_to_aid2, aids1)) for aids1 in grouped_aids1] set_grouped_aids1_t = set(grouped_aids1_t) set_grouped_aids2 = set(grouped_aids2) # Find names we got right. (correct groupings of annotations) # these are the annotation groups that are intersecting between # the test database and groundtruth database perfect_groups = set_grouped_aids2.intersection(set_grouped_aids1_t) # Find names we got wrong. (incorrect groupings of annotations) # The test database sets that were not perfect nonperfect_groups = set_grouped_aids2.difference(perfect_groups) # What we should have got # The ground truth database sets that were not fully identified missed_groups = set_grouped_aids1_t.difference(perfect_groups) # Mark non perfect groups by their error type false_negative_groups = [] # failed to link enough false_positive_groups = [] # linked too much for nonperfect_group in nonperfect_groups: if ut.is_subset_of_any(nonperfect_group, missed_groups): false_negative_groups.append(nonperfect_group) else: false_positive_groups.append(nonperfect_group) # Get some more info on the nonperfect groups # find which groups should have been linked aid2_to_aid1 = ut.invert_dict(aid1_to_aid2) false_negative_groups_t = [tuple(ut.dict_take_list(aid2_to_aid1, aids2)) for aids2 in false_negative_groups] false_negative_group_nids_t = ibs_gt.unflat_map(ibs_gt.get_annot_nids, false_negative_groups_t) assert all(map(ut.allsame, false_negative_group_nids_t)), 'inconsistent nids' false_negative_group_nid_t = ut.get_list_column(false_negative_group_nids_t, 0) # These are the links that should have been made missed_links = ut.group_items(false_negative_groups, false_negative_group_nid_t) print(ut.dict_str(missed_links)) print('# Name with failed links (FN) = %r' % len(false_negative_groups)) print('... should have reduced to %d names.' % (len(missed_links))) print('# Name with wrong links (FP) = %r' % len(false_positive_groups)) print('# Name correct names (TP) = %r' % len(perfect_groups))
def temp_model(num_annots, num_names, score_evidence=[], name_evidence=[], other_evidence={}, noquery=False, verbose=None, **kwargs): if verbose is None: verbose = ut.VERBOSE method = kwargs.pop('method', None) model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs) if verbose: model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE]) model, evidence, soft_evidence = update_model_evidence( model, name_evidence, score_evidence, other_evidence) if verbose and len(soft_evidence) != 0: model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE], title='Soft Evidence', color='green') # if verbose: # ut.colorprint('\n --- Soft Evidence ---', 'white') # for ttype, cpds in model.ttype2_cpds.items(): # if ttype != MATCH_TTYPE: # for fs_ in ut.ichunks(cpds, 4): # ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), # 'green') if verbose: ut.colorprint('\n --- Inference ---', 'red') if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery: evidence = model._ensure_internal_evidence(evidence) query_vars = [] query_vars += ut.list_getattr(model.ttype2_cpds[NAME_TTYPE], 'variable') # query_vars += ut.list_getattr(model.ttype2_cpds[MATCH_TTYPE], 'variable') query_vars = ut.setdiff(query_vars, evidence.keys()) # query_vars = ut.setdiff(query_vars, soft_evidence.keys()) query_results = cluster_query(model, query_vars, evidence, soft_evidence, method) else: query_results = {} factor_list = query_results['factor_list'] if verbose: if verbose: logger.info('+--------') semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] for type_, factors in ut.group_items(factor_list, semtypes).items(): logger.info('Result Factors (%r)' % (type_, )) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow') logger.info('MAP assignments') top_assignments = query_results.get('top_assignments', []) tmp = [] for lbl, val in top_assignments: tmp.append('%s : %.4f' % (ut.repr2(lbl), val)) logger.info(ut.align('\n'.join(tmp), ' :')) logger.info('L_____\n') showkw = dict(evidence=evidence, soft_evidence=soft_evidence, **query_results) from wbia.algo.hots import pgm_viz pgm_viz.show_model(model, **showkw) return (model, evidence, query_results)
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
def test_model(num_annots, num_names, score_evidence=[], name_evidence=[], other_evidence={}, noquery=False, verbose=None, **kwargs): if verbose is None: verbose = ut.VERBOSE method = kwargs.pop('method', None) model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs) if verbose: model.print_priors(ignore_ttypes=['match', 'score']) model, evidence, soft_evidence = update_model_evidence( model, name_evidence, score_evidence, other_evidence) if verbose and len(soft_evidence) != 0: model.print_priors(ignore_ttypes=['match', 'score'], title='Soft Evidence', color='green') #if verbose: # ut.colorprint('\n --- Soft Evidence ---', 'white') # for ttype, cpds in model.ttype2_cpds.items(): # if ttype != 'match': # for fs_ in ut.ichunks(cpds, 4): # ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), # 'green') if verbose: ut.colorprint('\n --- Inference ---', 'red') if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery: evidence = model._ensure_internal_evidence(evidence) query_vars = [] query_vars += ut.list_getattr(model.ttype2_cpds['name'], 'variable') #query_vars += ut.list_getattr(model.ttype2_cpds['match'], 'variable') query_vars = ut.setdiff(query_vars, evidence.keys()) #query_vars = ut.setdiff(query_vars, soft_evidence.keys()) query_results = cluster_query(model, query_vars, evidence, soft_evidence, method) else: query_results = {} factor_list = query_results['factor_list'] if verbose: if verbose: print('+--------') semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] for type_, factors in ut.group_items(factor_list, semtypes).items(): print('Result Factors (%r)' % (type_,)) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow') print('MAP assignments') top_assignments = query_results.get('top_assignments', []) tmp = [] for lbl, val in top_assignments: tmp.append('%s : %.4f' % (ut.repr2(lbl), val)) print(ut.align('\n'.join(tmp), ' :')) print('L_____\n') showkw = dict(evidence=evidence, soft_evidence=soft_evidence, **query_results) pgm_viz.show_model(model, **showkw) return (model, evidence, query_results)
QUALITY_INT_TO_TEXT = OrderedDict([ (5, QUAL_EXCELLENT,), (4, QUAL_GOOD,), (3, QUAL_OK,), (2, QUAL_POOR,), # oops forgot 1. will be mapped to poor (0, QUAL_JUNK,), (-1, QUAL_UNKNOWN,), ]) QUALITY_TEXT_TO_INT = ut.invert_dict(QUALITY_INT_TO_TEXT) QUALITY_INT_TO_TEXT[1] = QUAL_JUNK #QUALITY_TEXT_TO_INTS = ut.invert_dict(QUALITY_INT_TO_TEXT) QUALITY_TEXT_TO_INTS = ut.group_items( list(QUALITY_INT_TO_TEXT.keys()), list(QUALITY_INT_TO_TEXT.values())) QUALITY_TEXT_TO_INTS[QUAL_UNKNOWN] = -1 QUALITY_INT_TO_TEXT[None] = QUALITY_INT_TO_TEXT[-1] SEX_INT_TO_TEXT = { None: 'UNKNOWN NAME', -1 : 'UNKNOWN SEX', 0 : 'Female', 1 : 'Male', } SEX_TEXT_TO_INT = ut.invert_dict(SEX_INT_TO_TEXT) class PATH_NAMES(object):
), # oops forgot 1. will be mapped to poor ( 0, QUAL_JUNK, ), ( -1, QUAL_UNKNOWN, ), ]) QUALITY_TEXT_TO_INT = ut.invert_dict(QUALITY_INT_TO_TEXT) QUALITY_INT_TO_TEXT[1] = QUAL_JUNK # QUALITY_TEXT_TO_INTS = ut.invert_dict(QUALITY_INT_TO_TEXT) QUALITY_TEXT_TO_INTS = ut.group_items(list(QUALITY_INT_TO_TEXT.keys()), list(QUALITY_INT_TO_TEXT.values())) QUALITY_TEXT_TO_INTS[QUAL_UNKNOWN] = -1 QUALITY_INT_TO_TEXT[None] = QUALITY_INT_TO_TEXT[-1] SEX_INT_TO_TEXT = { None: 'UNKNOWN NAME', -1: 'UNKNOWN SEX', 0: 'Female', 1: 'Male', 2: 'INDETERMINATE SEX', } SEX_TEXT_TO_INT = ut.invert_dict(SEX_INT_TO_TEXT) class PATH_NAMES(object): # NOQA """ Path names for internal IBEIS database """
def edges_to_adjacency_list(edges): import utool as ut children_, parents_ = list(zip(*edges)) parent_to_children = ut.group_items(parents_, children_) #to_leafs = {tablename: path_to_leafs(tablename, parent_to_children)} return parent_to_children
def check_results(ibs_gt, ibs2, aid1_to_aid2, aids_list1_, incinfo): """ reports how well the incremental query ran when the oracle was calling the shots. """ print('--------- CHECKING RESULTS ------------') testcases = incinfo.get('testcases') if testcases is not None: count_dict = ut.count_dict_vals(testcases) print('+--') #print(ut.dict_str(testcases)) print('---') print(ut.dict_str(count_dict)) print('L__') # TODO: dont include initially added aids in the result reporting aid_list1 = aids_list1_ # ibs_gt.get_valid_aids() #aid_list1 = ibs_gt.get_aids_with_groundtruth() aid_list2 = ibs2.get_valid_aids() nid_list1 = ibs_gt.get_annot_nids(aid_list1) nid_list2 = ibs2.get_annot_nids(aid_list2) # Group annotations from test and gt database by their respective names grouped_dict1 = ut.group_items(aid_list1, nid_list1) grouped_dict2 = ut.group_items(aid_list2, nid_list2) grouped_aids1 = list(six.itervalues(grouped_dict1)) grouped_aids2 = list(map(tuple, six.itervalues(grouped_dict2))) #group_nids1 = list(six.iterkeys(grouped_dict1)) #group_nids2 = list(six.iterkeys(grouped_dict2)) # Transform annotation ids from database1 space to database2 space grouped_aids1_t = [ tuple(ut.dict_take_list(aid1_to_aid2, aids1)) for aids1 in grouped_aids1 ] set_grouped_aids1_t = set(grouped_aids1_t) set_grouped_aids2 = set(grouped_aids2) # Find names we got right. (correct groupings of annotations) # these are the annotation groups that are intersecting between # the test database and groundtruth database perfect_groups = set_grouped_aids2.intersection(set_grouped_aids1_t) # Find names we got wrong. (incorrect groupings of annotations) # The test database sets that were not perfect nonperfect_groups = set_grouped_aids2.difference(perfect_groups) # What we should have got # The ground truth database sets that were not fully identified missed_groups = set_grouped_aids1_t.difference(perfect_groups) # Mark non perfect groups by their error type false_negative_groups = [] # failed to link enough false_positive_groups = [] # linked too much for nonperfect_group in nonperfect_groups: if ut.is_subset_of_any(nonperfect_group, missed_groups): false_negative_groups.append(nonperfect_group) else: false_positive_groups.append(nonperfect_group) # Get some more info on the nonperfect groups # find which groups should have been linked aid2_to_aid1 = ut.invert_dict(aid1_to_aid2) false_negative_groups_t = [ tuple(ut.dict_take_list(aid2_to_aid1, aids2)) for aids2 in false_negative_groups ] false_negative_group_nids_t = ibs_gt.unflat_map(ibs_gt.get_annot_nids, false_negative_groups_t) assert all(map(ut.allsame, false_negative_group_nids_t)), 'inconsistent nids' false_negative_group_nid_t = ut.get_list_column( false_negative_group_nids_t, 0) # These are the links that should have been made missed_links = ut.group_items(false_negative_groups, false_negative_group_nid_t) print(ut.dict_str(missed_links)) print('# Name with failed links (FN) = %r' % len(false_negative_groups)) print('... should have reduced to %d names.' % (len(missed_links))) print('# Name with wrong links (FP) = %r' % len(false_positive_groups)) print('# Name correct names (TP) = %r' % len(perfect_groups))
def make_agraph(graph): # FIXME; use this in nx_agraph_layout instead to comparementalize more import networkx as nx import pygraphviz # Convert to agraph format graph_ = graph.copy() ut.nx_ensure_agraph_color(graph_) # Reduce size to be in inches not pixels # FIXME: make robust to param settings # Hack to make the w/h of the node take thae max instead of # dot which takes the minimum shaped_nodes = [n for n, d in graph_.nodes(data=True) if 'width' in d] node_attrs = ut.dict_take(graph_.node, shaped_nodes) width_px = np.array(ut.take_column(node_attrs, 'width')) height_px = np.array(ut.take_column(node_attrs, 'height')) scale = np.array(ut.dict_take_column(node_attrs, 'scale', default=1.0)) width_in = width_px / 72.0 * scale height_in = height_px / 72.0 * scale width_in_dict = dict(zip(shaped_nodes, width_in)) height_in_dict = dict(zip(shaped_nodes, height_in)) nx.set_node_attributes(graph_, 'width', width_in_dict) nx.set_node_attributes(graph_, 'height', height_in_dict) ut.nx_delete_node_attr(graph_, 'scale') # Check for any nodes with groupids node_to_groupid = nx.get_node_attributes(graph_, 'groupid') if node_to_groupid: groupid_to_nodes = ut.group_items(*zip(*node_to_groupid.items())) else: groupid_to_nodes = {} # Initialize agraph format #import utool #utool.embed() ut.nx_delete_None_edge_attr(graph_) agraph = nx.nx_agraph.to_agraph(graph_) # Add subgraphs labels # TODO: subgraph attrs for groupid, nodes in groupid_to_nodes.items(): subgraph_attrs = {} #subgraph_attrs = dict(rankdir='LR') #subgraph_attrs['rank'] = 'min' subgraph_attrs['rank'] = 'same' name = groupid name = 'cluster_' + groupid agraph.add_subgraph(nodes, name, **subgraph_attrs) for node in graph_.nodes(): # force pinning of node points anode = pygraphviz.Node(agraph, node) if anode.attr['pin'] == 'true': if anode.attr['pos'] is not None and not anode.attr['pos'].endswith('!'): import re #utool.embed() ptstr = anode.attr['pos'].strip('[]').strip(' ') ptstr_list = re.split(r'\s+', ptstr) pt_arr = np.array(list(map(float, ptstr_list))) / 72.0 #print('pt_arr = %r' % (pt_arr,)) new_ptstr_list = list(map(str, pt_arr)) new_ptstr = ','.join(new_ptstr_list) + '!' #print('new_ptstr = %r' % (new_ptstr,)) anode.attr['pos'] = new_ptstr return agraph