def _print_previous_loop_statistics(infr, count): # Print stats about what happend in the this loop history = infr.metrics_list[-count:] recover_blocks = ub.group_items([ (k, sum(1 for i in g)) for k, g in it.groupby(util.take_column(history, 'recovering')) ]).get(True, []) infr.print( ('Recovery mode entered {} times, ' 'made {} recovery decisions.').format(len(recover_blocks), sum(recover_blocks)), color='green') testaction_hist = ub.dict_hist(util.take_column( history, 'test_action')) infr.print('Test Action Histogram: {}'.format( ub.repr2(testaction_hist, si=True)), color='yellow') if infr.params['inference.enabled']: action_hist = ub.dict_hist( util.emap(frozenset, util.take_column(history, 'action'))) infr.print('Inference Action Histogram: {}'.format( ub.repr2(action_hist, si=True)), color='yellow') infr.print('Decision Histogram: {}'.format( ub.repr2(ub.dict_hist(util.take_column(history, 'pred_decision')), si=True)), color='yellow') infr.print('User Histogram: {}'.format( ub.repr2(ub.dict_hist(util.take_column(history, 'user_id')), si=True)), color='yellow')
def _make_lnbnn_scores(infr, edges): edge_to_data = infr._get_cm_edge_data(edges) edges = list(edge_to_data.keys()) edge_scores = list(util.take_column(edge_to_data.values(), 'score')) edge_scores = util.replace_nones(edge_scores, np.nan) edge_scores = np.array(edge_scores) # take the inf-norm normscores = edge_scores / util.safe_max(edge_scores, nans=False) return normscores
def apply_match_scores(infr): """ Applies precomputed matching scores to edges that already exist in the graph. Typically you should run infr.apply_match_edges() before running this. Example: >>> # ENABLE_DOCTEST >>> infr = testdata_infr('PZ_MTEST') >>> infr.exec_matching() >>> infr.apply_match_edges() >>> infr.apply_match_scores() >>> infr.get_edge_attrs('score') """ if infr.cm_list is None: infr.print('apply_match_scores - no scores to apply!') return infr.print('apply_match_scores', 1) edges = list(infr.graph.edges()) edge_to_data = infr._get_cm_edge_data(edges) # Remove existing attrs util.nx_delete_edge_attr(infr.graph, 'score') util.nx_delete_edge_attr(infr.graph, 'rank') util.nx_delete_edge_attr(infr.graph, 'normscore') edges = list(edge_to_data.keys()) edge_scores = list(util.take_column(edge_to_data.values(), 'score')) edge_scores = util.replace_nones(edge_scores, np.nan) edge_scores = np.array(edge_scores) edge_ranks = np.array(util.take_column(edge_to_data.values(), 'rank')) # take the inf-norm normscores = edge_scores / util.safe_max(edge_scores, nans=False) # Add new attrs infr.set_edge_attrs('score', ub.dzip(edges, edge_scores)) infr.set_edge_attrs('rank', ub.dzip(edges, edge_ranks)) # Hack away zero probabilites # probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9 # probs = util.normalize(probs, axis=1, ord=1, out=probs) # entropy = -(np.log2(probs) * probs).sum(axis=1) infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
def _get_cm_agg_aid_ranking(infr, cc): aid_to_cm = {cm.qaid: cm for cm in infr.cm_list} all_scores = ub.ddict(list) for qaid in cc: cm = aid_to_cm[qaid] # should we be doing nids? for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()): all_scores[daid].append(score) max_scores = sorted( (max(scores), aid) for aid, scores in all_scores.items())[::-1] ranked_aids = util.take_column(max_scores, 1) return ranked_aids
def edge_attr_df(infr, key, edges=None, default=ub.NoParam): """ constructs DataFrame using current predictions """ edge_states = infr.gen_edge_attrs(key, edges=edges, default=default) edge_states = list(edge_states) if isinstance(edges, pd.MultiIndex): index = edges else: if edges is None: edges_ = util.take_column(edge_states, 0) else: edges_ = list(map(tuple, util.aslist(edges))) index = pd.MultiIndex.from_tuples(edges_, names=('aid1', 'aid2')) records = util.itake_column(edge_states, 1) edge_df = pd.Series.from_array(records) edge_df.name = key edge_df.index = index return edge_df