def report_partitioning_statistics(new_reduced_joint): # compute partitioning statistics import vtool as vt vals, idxs = vt.group_indices(new_reduced_joint.values.ravel()) #groupsize = list(map(len, idxs)) #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs) all_states = new_reduced_joint._row_labels(asindex=True) clusterstats = [tuple(sorted(list(ut.dict_hist(a).values()))) for a in all_states] grouped_vals = ut.group_items(new_reduced_joint.values.ravel(), clusterstats) #probs_assigned_to_clustertype = [( # sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a) # for a, b in grouped_vals.items()] probs_assigned_to_clustertype = [( ut.dict_hist(np.array(b).round(decimals=5)), a) for a, b in grouped_vals.items()] sortx = ut.argsort([max(c[0].keys()) for c in probs_assigned_to_clustertype]) probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx) # This list of 2-tuples with the first item being the unique # probabilies that are assigned to a cluster type along with the number # of times they were assigned. A cluster type is the second item. Every # number represents how many annotations were assigned to a specific # label. The length of that list is the number of total labels. For # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]] # indicating that that the assignment of everyone to a different label happend once # where the probability was somenum and a 800 times where the probability was 0. #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items()) #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()]) print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
def annot_to_class_feats2(aid, aid2_nid, top=None): pair_list = [] score_list = [] nexemplar_list = [] for nid in unique_nids: label = (aid2_nid[aid] == nid) num_exemplars = nid2_nexemp.get(nid, 0) if num_exemplars == 0: continue params = toy_params[label] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma, size=num_exemplars).max() score = np.clip(score_, 0, np.inf) pair_list.append((aid, nid)) score_list.append(score) nexemplar_list.append(num_exemplars) rank_list = ut.argsort(score_list, reverse=True) feat_list = np.array([score_list, rank_list, nexemplar_list]).T sortx = np.argsort(rank_list) feat_list = feat_list.take(sortx, axis=0) pair_list = np.array(pair_list).take(sortx, axis=0) if top is not None: feat_list = feat_list[:top] pair_list = pair_list[0:top] return pair_list, feat_list
def fix_splits_interaction(ibs): """ python -m wbia fix_splits_interaction --show Example: >>> # DISABLE_DOCTEST GGR >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) >>> import wbia.guitool as gt >>> gt.ensure_qtapp() >>> win = fix_splits_interaction(ibs) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> gt.qtapp_loop(qwin=win) """ split_props = {'splitcase', 'photobomb'} all_annot_groups = ibs._annot_groups( ibs.group_annots_by_name(ibs.get_valid_aids())[0]) all_has_split = [ len(split_props.intersection(ut.flatten(tags))) > 0 for tags in all_annot_groups.match_tags ] tosplit_annots = ut.compress(all_annot_groups.annots_list, all_has_split) tosplit_annots = ut.take(tosplit_annots, ut.argsort(ut.lmap(len, tosplit_annots)))[::-1] if ut.get_argflag('--reverse'): tosplit_annots = tosplit_annots[::-1] logger.info('len(tosplit_annots) = %r' % (len(tosplit_annots), )) aids_list = [a.aids for a in tosplit_annots] from wbia.algo.graph import graph_iden from wbia.viz import viz_graph2 import wbia.guitool as gt import wbia.plottool as pt pt.qt4ensure() gt.ensure_qtapp() for aids in ut.InteractiveIter(aids_list): infr = graph_iden.AnnotInference(ibs, aids) infr.initialize_graph() win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode='rereview') win.populate_edge_model() win.show() return win
def compute_order(rmi): """ Returns order of computation from this input node to the sink """ # graph, source, target = rmi.exi_graph, rmi.node, rmi.sink node_order_ = list( ut.nx_all_nodes_between(rmi.exi_graph, rmi.node, rmi.sink)) node_rank = ut.nx_dag_node_rank(rmi.exi_graph.reverse(), node_order_) node_names = list(map(str, node_order_)) # lexsort via names to break ties for consistent ordering sortx = ut.argsort(node_rank, node_names)[::-1] # sortx = ut.argsort(node_rank)[::-1] node_order = ut.take(node_order_, sortx) return node_order
def test_sharpness(): import ibeis defaltdb = 'seaturtles' a = ['default'] ibs = ibeis.opendb(defaultdb=defaltdb) ibs, qaids, daids = ibeis.testdata_expanded_aids(ibs=ibs, a=a) from vtool import quality_classifier contrast_list = [quality_classifier.compute_average_contrast(chip) for chip in ibs.get_annot_chips(qaids)] sortx = ut.argsort(contrast_list)[::-1] sharpest_qaids = ut.take(qaids, sortx) aid = sharpest_qaids[0] ut.ensure_pylab_qt4() from ibeis import viz import plottool as pt for aid in ut.InteractiveIter(sharpest_qaids): viz.show_chip(ibs, aid, annot=False, nokpts=True) pt.update()
def flat_compute_order(inputs): """ This is basically the scheduler TODO: We need to verify the correctness of this logic. It seems to not be deterministic between versions of python. CommandLine: python -m dtool.input_helpers flat_compute_order Example: >>> # xdoctest: +REQUIRES(--fixme) >>> from wbia.dtool.input_helpers import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc4() >>> inputs = depc['feat'].rootmost_inputs.total_expand() >>> flat_compute_order = inputs.flat_compute_order() >>> result = ut.repr2(flat_compute_order) ... >>> print(result) [chip[t, t:1, 1:1], probchip[t, t:1, 1:1], feat[t, t:1]] """ # Compute the order in which all noes must be evaluated import networkx as nx # NOQA ordered_compute_nodes = [ rmi.compute_order() for rmi in inputs.rmi_list ] flat_node_order_ = ut.unique(ut.flatten(ordered_compute_nodes)) rgraph = inputs.exi_graph.reverse() toprank = ut.nx_topsort_rank(rgraph, flat_node_order_) sortx = ut.argsort(toprank)[::-1] flat_compute_order = ut.take(flat_node_order_, sortx) # Inputs are pre-computed. for rmi in inputs.rmi_list: try: flat_compute_order.remove(rmi.node) except ValueError as ex: ut.printex(ex, 'something is wrong', keys=['rmi.node']) raise return flat_compute_order
def report_partitioning_statistics(new_reduced_joint): # compute partitioning statistics import vtool as vt vals, idxs = vt.group_indices(new_reduced_joint.values.ravel()) # groupsize = list(map(len, idxs)) # groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs) all_states = new_reduced_joint._row_labels(asindex=True) clusterstats = [ tuple(sorted(list(ut.dict_hist(a).values()))) for a in all_states ] grouped_vals = ut.group_items(new_reduced_joint.values.ravel(), clusterstats) # probs_assigned_to_clustertype = [( # sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a) # for a, b in grouped_vals.items()] probs_assigned_to_clustertype = [ (ut.dict_hist(np.array(b).round(decimals=5)), a) for a, b in grouped_vals.items() ] sortx = ut.argsort( [max(c[0].keys()) for c in probs_assigned_to_clustertype]) probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx) # This list of 2-tuples with the first item being the unique # probabilies that are assigned to a cluster type along with the number # of times they were assigned. A cluster type is the second item. Every # number represents how many annotations were assigned to a specific # label. The length of that list is the number of total labels. For # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]] # indicating that that the assignment of everyone to a different label happend once # where the probability was somenum and a 800 times where the probability was 0. # logger.info(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items()) # z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()]) logger.info( ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
def test_visualize_vocab_interact(): """ python -m ibeis.new_annots --exec-test_visualize_vocab_interact --show Example: >>> from ibeis.new_annots import * # NOQA >>> test_visualize_vocab_interact() >>> ut.show_if_requested() """ import plottool as pt pt.qt4ensure() ibs, aid_list, vocab = testdata_vocab() #aid_list = aid_list[0:1] fstack = StackedFeatures(ibs, aid_list) nAssign = 2 invassign = fstack.inverted_assignment(vocab, nAssign) sortx = ut.argsort(invassign.num_list)[::-1] wx_list = ut.take(invassign.wx_list, sortx) wx = wx_list[0] fnum = 1 for wx in ut.InteractiveIter(wx_list): visualize_vocab_word(ibs, invassign, wx, fnum)
def test_sharpness(): import ibeis defaltdb = 'seaturtles' a = ['default'] ibs = ibeis.opendb(defaultdb=defaltdb) ibs, qaids, daids = ibeis.testdata_expanded_aids(ibs=ibs, a=a) from vtool import quality_classifier contrast_list = [ quality_classifier.compute_average_contrast(chip) for chip in ibs.get_annot_chips(qaids) ] sortx = ut.argsort(contrast_list)[::-1] sharpest_qaids = ut.take(qaids, sortx) aid = sharpest_qaids[0] ut.ensure_pylab_qt4() from ibeis import viz import plottool as pt for aid in ut.InteractiveIter(sharpest_qaids): viz.show_chip(ibs, aid, annot=False, nokpts=True) pt.update()
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list( zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same))**2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples
def initialize_graph_and_model(infr): """ Unused in internal split stuff pt.qt4ensure() layout_info = pt.show_nx(graph, as_directed=False, fnum=1, layoutkw=dict(prog='neato'), use_image=True, verbose=0) ax = pt.gca() pt.zoom_factory() pt.interactions.PanEvents() """ #import networkx as nx #import itertools cm_list = infr.cm_list hack = True hack = False if hack: cm_list = cm_list[:10] qaid_list = [cm.qaid for cm in cm_list] daids_list = [cm.daid_list for cm in cm_list] unique_aids = sorted(ut.list_union(*daids_list + [qaid_list])) if hack: unique_aids = sorted(ut.isect(unique_aids, qaid_list)) aid2_aidx = ut.make_index_lookup(unique_aids) # Construct K-broken graph edges = [] edge_weights = [] #top = (infr.qreq_.qparams.K + 1) * 2 #top = (infr.qreq_.qparams.K) * 2 top = (infr.qreq_.qparams.K + 2) for count, cm in enumerate(cm_list): qidx = aid2_aidx[cm.qaid] score_list = cm.annot_score_list sortx = ut.argsort(score_list)[::-1] score_list = ut.take(score_list, sortx)[:top] daid_list = ut.take(cm.daid_list, sortx)[:top] for score, daid in zip(score_list, daid_list): if daid not in qaid_list: continue didx = aid2_aidx[daid] edge_weights.append(score) edges.append((qidx, didx)) # make symmetric directed_edges = dict(zip(edges, edge_weights)) # Find edges that point in both directions undirected_edges = {} for (u, v), w in directed_edges.items(): if (v, u) in undirected_edges: undirected_edges[(v, u)] += w undirected_edges[(v, u)] /= 2 else: undirected_edges[(u, v)] = w edges = list(undirected_edges.keys()) edge_weights = list(undirected_edges.values()) nodes = list(range(len(unique_aids))) nid_labeling = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_labeling) import networkx as nx from ibeis.viz import viz_graph set_node_attrs = nx.set_node_attributes set_edge_attrs = nx.set_edge_attributes # Create match-based graph structure graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) # Important properties nid_list = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_list) set_node_attrs(graph, 'name_label', dict(zip(nodes, labeling))) set_edge_attrs(graph, 'weight', dict(zip(edges, edge_weights))) # Visualization properties import plottool as pt ax2_aid = ut.invert_dict(aid2_aidx) set_node_attrs(graph, 'aid', ax2_aid) viz_graph.ensure_node_images(infr.qreq_.ibs, graph) set_node_attrs(graph, 'framewidth', dict(zip(nodes, [3.0] * len(nodes)))) set_node_attrs(graph, 'framecolor', dict(zip(nodes, [pt.DARK_BLUE] * len(nodes)))) ut.color_nodes(graph, labelattr='name_label') edge_colors = pt.scores_to_color(np.array(edge_weights), cmap_='viridis') #import utool #utool.embed() #edge_colors = [pt.color_funcs.ensure_base255(color) for color in edge_colors] #print('edge_colors = %r' % (edge_colors,)) set_edge_attrs(graph, 'color', dict(zip(edges, edge_colors))) # Build inference model from ibeis.algo.hots import graph_iden #graph_iden.rrr() model = graph_iden.InfrModel(graph) #model = graph_iden.InfrModel(len(nodes), edges, edge_weights, labeling=labeling) infr.model = model
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list(zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same)) ** 2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples