def _set_pos_redun_flag(infr, nid, flag): """ Flags or unflags an nid as positive redundant. """ was_pos_redun = nid in infr.pos_redun_nids if flag: if not was_pos_redun: infr.print('pos_redun flag=T nid=%r' % (nid,), 5) else: infr.print('pos_redun flag=T nid=%r (already done)' % (nid,), 6) infr.pos_redun_nids.add(nid) cc = infr.pos_graph.component(nid) infr.remove_internal_priority(cc) if infr.params['inference.update_attrs']: infr.set_edge_attrs( 'inferred_state', ub.dzip(nxu.edges_inside(infr.graph, cc), ['same']) ) else: if was_pos_redun: infr.print('pos_redun flag=F nid=%r' % (nid,), 5) else: infr.print('pos_redun flag=F nid=%r (already done)' % (nid,), 6) cc = infr.pos_graph.component(nid) infr.pos_redun_nids -= {nid} infr.reinstate_internal_priority(cc) if infr.params['inference.update_attrs']: infr.set_edge_attrs( 'inferred_state', ub.dzip(nxu.edges_inside(infr.graph, cc), [None]) )
def test_dzip_errors(): with pytest.raises(TypeError): ub.dzip([1], 2) with pytest.raises(TypeError): ub.dzip(1, [2]) with pytest.raises(ValueError): ub.dzip([1, 2, 3], []) with pytest.raises(ValueError): ub.dzip([], [4, 5, 6]) with pytest.raises(ValueError): ub.dzip([1, 2, 3], [4, 5])
def on_between(infr, edge, decision, prev_decision, nid1, nid2, merge_nid=None): """ Callback when a review is made between two PCCs """ action = ['between'] infr._update_neg_metagraph(decision, prev_decision, nid1, nid2, merge_nid=merge_nid) if merge_nid is not None: # A merge occurred if infr.params['inference.update_attrs']: cc = infr.pos_graph.component(merge_nid) infr.set_node_attrs('name_label', ub.dzip(cc, [merge_nid])) # FIXME: this state is ugly action += ['merge'] else: if decision == NEGTV: action += ['neg-evidence'] elif decision == INCMP: action += ['incomp-evidence'] else: action += ['other-evidence'] return action
def update_neighbors(self): # TODO: this should be done with a fast spatial index, but # unfortunately I don't see any existing implementations that make it # easy to support moving points. utriu_dists = pdist(self.pos) utriu_flags = utriu_dists < self.config['perception_thresh'] utriu_rx, utriu_cx = np.triu_indices(len(self.pos), k=1) utriu_neighb_rxs = utriu_rx[utriu_flags] utriu_neighb_cxs = utriu_cx[utriu_flags] neighb_rxs = np.r_[utriu_neighb_rxs, utriu_neighb_cxs] neighb_cxs = np.r_[utriu_neighb_cxs, utriu_neighb_rxs] group_rxs, groupxs = kwarray.group_indices(neighb_rxs) group_cxs = kwarray.apply_grouping(neighb_cxs, groupxs) rx_to_neighb_cxs = ub.dzip(group_rxs, group_cxs) # n = len(self.pos) # rx_to_neighb_utriu_idxs = {} # for rx, cxs in rx_to_neighb_cxs.items(): # rxs = np.full_like(cxs, fill_value=rx) # multi_index = (rxs, cxs) # utriu_idxs = triu_condense_multi_index( # multi_index, dims=(n, n), symetric=True) # rx_to_neighb_utriu_idxs[rx] = utriu_idxs # self.utriu_dists = utriu_dists self.rx_to_neighb_cxs = rx_to_neighb_cxs # self.rx_to_neighb_utriu_idxs = rx_to_neighb_utriu_idxs # Compute speed and direction of every boid self.speeds = np.linalg.norm(self.vel, axis=1) self.dirs = self.vel / self.speeds[:, None]
def init_test_mode(infr): from graphid.core import nx_dynamic_graph infr.print('init_test_mode') infr.test_mode = True # infr.edge_truth = {} infr.metrics_list = [] infr.test_state = { 'n_decision': 0, 'n_algo': 0, 'n_manual': 0, 'n_true_merges': 0, 'n_error_edges': 0, 'confusion': None, } infr.test_gt_pos_graph = nx_dynamic_graph.DynConnGraph() infr.test_gt_pos_graph.add_nodes_from(infr.aids) infr.nid_to_gt_cc = ub.group_items(infr.aids, infr.orig_name_labels) infr.node_truth = ub.dzip(infr.aids, infr.orig_name_labels) # infr.real_n_pcc_mst_edges = sum( # len(cc) - 1 for cc in infr.nid_to_gt_cc.values()) # util.cprint('real_n_pcc_mst_edges = %r' % ( # infr.real_n_pcc_mst_edges,), 'red') infr.metrics_list = [] infr.nid_to_gt_cc = ub.group_items(infr.aids, infr.orig_name_labels) infr.real_n_pcc_mst_edges = sum( len(cc) - 1 for cc in infr.nid_to_gt_cc.values()) infr.print('real_n_pcc_mst_edges = %r' % (infr.real_n_pcc_mst_edges, ), color='red')
def _query_nvidia_smi(mode, fields): """ Runs nvidia smi in query mode Args: mode (str): the query cli flag to pass to nvidia-smi fields (List[str]): csv header fields to query Returns: List[Dict[str, str]]: parsed csv output """ header = ','.join(fields) command = [ 'nvidia-smi', '--{}={}'.format(mode, header), '--format=csv,noheader' ] info = ub.cmd(command) if info['ret'] != 0: print(info['out']) print(info['err']) raise Exception('unable to call nvidia-smi: ret={}'.format( info['ret'])) rows = [] for line in info['out'].split('\n'): line = line.strip() if line: parts = [p.strip() for p in line.split(',')] row = ub.dzip(fields, parts) rows.append(row) return rows
def _set_error_edges(infr, nid, new_error_edges): # flag error edges infr.nid_to_errors[nid] = new_error_edges # choose one and give it insanely high priority if infr.params['inference.update_attrs']: infr.set_edge_attrs('maybe_error', ub.dzip(new_error_edges, [True])) infr._increase_priority(new_error_edges, 10)
def apply_match_scores(infr): """ Applies precomputed matching scores to edges that already exist in the graph. Typically you should run infr.apply_match_edges() before running this. Example: >>> # ENABLE_DOCTEST >>> infr = testdata_infr('PZ_MTEST') >>> infr.exec_matching() >>> infr.apply_match_edges() >>> infr.apply_match_scores() >>> infr.get_edge_attrs('score') """ if infr.cm_list is None: infr.print('apply_match_scores - no scores to apply!') return infr.print('apply_match_scores', 1) edges = list(infr.graph.edges()) edge_to_data = infr._get_cm_edge_data(edges) # Remove existing attrs util.nx_delete_edge_attr(infr.graph, 'score') util.nx_delete_edge_attr(infr.graph, 'rank') util.nx_delete_edge_attr(infr.graph, 'normscore') edges = list(edge_to_data.keys()) edge_scores = list(util.take_column(edge_to_data.values(), 'score')) edge_scores = util.replace_nones(edge_scores, np.nan) edge_scores = np.array(edge_scores) edge_ranks = np.array(util.take_column(edge_to_data.values(), 'rank')) # take the inf-norm normscores = edge_scores / util.safe_max(edge_scores, nans=False) # Add new attrs infr.set_edge_attrs('score', ub.dzip(edges, edge_scores)) infr.set_edge_attrs('rank', ub.dzip(edges, edge_ranks)) # Hack away zero probabilites # probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9 # probs = util.normalize(probs, axis=1, ord=1, out=probs) # entropy = -(np.log2(probs) * probs).sum(axis=1) infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
def hardcase_review_gen(infr): """ Subiterator for hardcase review Re-review non-confident edges that vsone did not classify correctly """ infr.print('==============================', color='white') infr.print('--- HARDCASE PRIORITY LOOP ---', color='white') verifiers = infr.learn_evaluation_verifiers() verif = verifiers['match_state'] edges_ = list(infr.edges()) real_ = list(infr.edge_decision_from(edges_)) flags_ = [r in {POSTV, NEGTV, INCMP} for r in real_] real = list(ub.compress(real_, flags_)) edges = list(ub.compress(edges_, flags_)) hardness = 1 - verif.easiness(edges, real) if True: df = pd.DataFrame({'edges': edges, 'real': real}) df['hardness'] = hardness pred = verif.predict(edges) df['pred'] = pred.values df.sort_values('hardness', ascending=False) infr.print('hardness analysis') infr.print(str(df)) infr.print('infr status: ' + ub.repr2(infr.status())) # Don't re-review anything that was confidently reviewed # CONFIDENCE = const.CONFIDENCE # CODE_TO_INT = CONFIDENCE.CODE_TO_INT.copy() # CODE_TO_INT[CONFIDENCE.CODE.UNKNOWN] = 0 # conf = ub.take(CODE_TO_INT, infr.gen_edge_values( # 'confidence', edges, on_missing='default', # default=CONFIDENCE.CODE.UNKNOWN)) # This should only be run with certain params assert not infr.params['autoreview.enabled'] assert not infr.params['redun.enabled'] assert not infr.params['ranking.enabled'] assert infr.params['inference.enabled'] # const.CONFIDENCE.CODE.PRETTY_SURE if infr.params['queue.conf.thresh'] is None: # != 'pretty_sure': infr.print('WARNING: should queue.conf.thresh = "pretty_sure"?') # work around add_candidate_edges infr.prioritize(metric='hardness', edges=edges, scores=hardness) infr.set_edge_attrs('hardness', ub.dzip(edges, hardness)) yield from infr._inner_priority_gen(use_refresh=False)
def _precompute_class_weights(dset, workers=0, mode='median-idf'): """ Example: >>> # xdoctest: +REQUIRES(--slow) >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize() >>> dset = harn.datasets['train'] """ assert mode in ['median-idf', 'log-median-idf'] total_freq = _cached_class_frequency(dset, workers=workers) def logb(arr, base): if base == 'e': return np.log(arr) elif base == 2: return np.log2(arr) elif base == 10: return np.log10(arr) else: out = np.log(arr) out /= np.log(base) return out _min, _max = np.percentile(total_freq, [5, 95]) is_valid = (_min <= total_freq) & (total_freq <= _max) if np.any(is_valid): middle_value = np.median(total_freq[is_valid]) else: middle_value = np.median(total_freq) # variant of median-inverse-frequency nonzero_freq = total_freq[total_freq != 0] if len(nonzero_freq): total_freq[total_freq == 0] = nonzero_freq.min() / 2 if mode == 'median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 elif mode == 'log-median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 base = 2 base = np.exp(1) weights = logb(weights + (base - 1), base) weights = np.maximum(weights, .1) weights = np.minimum(weights, 10) else: raise KeyError('mode = {!r}'.format(mode)) weights = np.round(weights, 2) cname_to_weight = ub.dzip(dset.classes, weights) print('weights: ' + ub.repr2(cname_to_weight)) return weights
def color_nodes(graph, labelattr='label', brightness=.878, outof=None, sat_adjust=None): """ Colors edges and nodes by nid """ node_to_lbl = nx.get_node_attributes(graph, labelattr) unique_lbls = sorted(set(node_to_lbl.values())) ncolors = len(unique_lbls) if outof is None: if (ncolors) == 1: unique_colors = [util.Color('lightblue').as01()] elif (ncolors) == 2: # https://matplotlib.org/examples/color/named_colors.html unique_colors = ['royalblue', 'orange'] unique_colors = [util.Color(c).as01('bgr') for c in unique_colors] else: unique_colors = util.distinct_colors(ncolors, brightness=brightness) else: unique_colors = util.distinct_colors(outof, brightness=brightness) if sat_adjust: unique_colors = [ util.Color(c).adjust_hsv(0.0, sat_adjust, 0.0) for c in unique_colors ] # Find edges and aids strictly between two nids if outof is None: lbl_to_color = ub.dzip(unique_lbls, unique_colors) else: gray = util.Color('lightgray').as01('bgr') unique_colors = [gray] + unique_colors offset = max(1, min(unique_lbls)) - 1 node_to_lbl = ub.map_vals(lambda nid: max(0, nid - offset), node_to_lbl) lbl_to_color = ub.dzip(range(outof + 1), unique_colors) node_to_color = ub.map_vals(lbl_to_color, node_to_lbl) nx.set_node_attributes(graph, name='color', values=node_to_color) nx_ensure_agraph_color(graph)
def _purge_error_edges(infr, nid): """ Removes all error edges associated with a PCC so they can be recomputed or resolved. """ old_error_edges = infr.nid_to_errors.pop(nid, []) # Remove priority from old error edges if infr.params['inference.update_attrs']: infr.set_edge_attrs('maybe_error', ub.dzip(old_error_edges, [None])) infr._remove_edge_priority(old_error_edges) was_clean = len(old_error_edges) > 0 return was_clean
def on_within(infr, edge, decision, prev_decision, nid, split_nids=None): """ Callback when a review is made inside a PCC Args: edge: the edge reviewed decision: the new decision prev_decision: the old decision nid: the old nid the edge is inside of split_nids: the tuple of new nids created if this decision splits a PCC """ action = ['within'] infr._update_neg_metagraph(decision, prev_decision, nid, nid, split_nids=split_nids) if split_nids is not None: # A split occurred if infr.params['inference.update_attrs']: new_nid1, new_nid2 = split_nids cc1 = infr.pos_graph.component(new_nid1) cc2 = infr.pos_graph.component(new_nid2) infr.set_node_attrs('name_label', ub.dzip(cc1, [new_nid1])) infr.set_node_attrs('name_label', ub.dzip(cc2, [new_nid2])) action += ['split'] else: if decision == POSTV: action += ['pos-evidence'] elif decision == INCMP: action += ['incomp-evidence'] elif decision == NEGTV: action += ['neg-evidence'] else: action += ['other-evidence'] return action
def predict_proba_df(verif, edges): """ CommandLine: python -m graphid.demo DummyVerif.predict_edges Example: >>> from graphid import demo >>> kwargs = dict(num_pccs=40, size=2) >>> infr = demo.demodata_infr(**kwargs) >>> verif = infr.dummy_verif >>> edges = list(infr.graph.edges()) >>> probs = verif.predict_proba_df(edges) """ infr = verif.infr edges = list(it.starmap(verif.infr.e_, edges)) prob_cache = infr.task_probs['match_state'] is_miss = np.array([e not in prob_cache for e in edges]) # is_hit = ~is_miss if np.any(is_miss): miss_edges = list(ub.compress(edges, is_miss)) miss_truths = [verif._get_truth(edge) for edge in miss_edges] grouped_edges = ub.group_items(miss_edges, miss_truths) # Need to make this determenistic too states = [POSTV, NEGTV, INCMP] for key in sorted(grouped_edges.keys()): group = grouped_edges[key] probs0 = util.randn(shape=[len(group)], rng=verif.rng, a_max=1, a_min=0, **verif.dummy_params[key]) # Just randomly assign other probs probs1 = verif.rng.rand(len(group)) * (1 - probs0) probs2 = 1 - (probs0 + probs1) for edge, probs in zip(group, zip(probs0, probs1, probs2)): prob_cache[edge] = ub.dzip(states, probs) probs = pd.DataFrame( list(ub.take(prob_cache, edges)), index=util.ensure_multi_index(edges, ('aid1', 'aid2')) ) return probs
def hypothesis_errors(infr, pos_subgraph, neg_edges): if not nx.is_connected(pos_subgraph): raise AssertionError('Not connected' + repr(pos_subgraph)) infr.print( 'Find hypothesis errors in {} nodes with {} neg edges'.format( len(pos_subgraph), len(neg_edges)), 3) pos_edges = list(pos_subgraph.edges()) neg_weight = infr._mincut_edge_weights(neg_edges) pos_weight = infr._mincut_edge_weights(pos_edges) capacity = 'weight' nx.set_edge_attributes(pos_subgraph, name=capacity, values=ub.dzip(pos_edges, pos_weight)) # Solve a multicut problem for multiple pairs of terminal nodes. # Running multiple min-cuts produces a k-factor approximation maybe_error_edges = set([]) for (s, t), join_weight in zip(neg_edges, neg_weight): cut_weight, parts = nx.minimum_cut(pos_subgraph, s, t, capacity=capacity) cut_edgeset = nxu.edges_cross(pos_subgraph, *parts) if join_weight < cut_weight: join_edgeset = {(s, t)} chosen = join_edgeset hypothesis = POSTV else: chosen = cut_edgeset hypothesis = NEGTV for edge in chosen: if edge not in maybe_error_edges: maybe_error_edges.add(edge) yield (edge, hypothesis)
def _set_neg_redun_flags(infr, nid1, other_nids, flags): """ Flags or unflags an nid1 as negative redundant with other nids. (TODO: NEG REDUN CAN BE CONSOLIDATED VIA NEG-META-GRAPH) """ needs_unflag = [] needs_flag = [] already_flagged = [] already_unflagged = [] cc1 = infr.pos_graph.component(nid1) other_nids = list(other_nids) # Determine what needs what for nid2, flag in zip(other_nids, flags): was_neg_redun = infr.neg_redun_metagraph.has_edge(nid1, nid2) if flag: if not was_neg_redun: needs_flag.append(nid2) else: already_flagged.append(nid2) else: if was_neg_redun: needs_unflag.append(nid2) else: already_unflagged.append(nid2) # Print summary of what will be done def _print_helper(what, others, already=False): if len(others) == 0: return n_other_thresh = 4 if len(others) > n_other_thresh: omsg = '#others={}'.format(len(others)) else: omsg = 'others={}'.format(others) amsg = '(already done)' if already else '' msg = '{} nid={}, {} {}'.format(what, nid1, omsg, amsg) infr.print(msg, 5 + already) _print_helper('neg_redun flag=T', needs_flag) _print_helper('neg_redun flag=T', already_flagged, already=True) _print_helper('neg_redun flag=F', needs_unflag) _print_helper('neg_redun flag=F', already_unflagged, already=True) # Do the flagging/unflagging for nid2 in needs_flag: infr.neg_redun_metagraph.add_edge(nid1, nid2) for nid2 in needs_unflag: infr.neg_redun_metagraph.remove_edge(nid1, nid2) # Update priorities and attributes if infr.params['inference.update_attrs'] or infr.queue is not None: all_flagged_edges = [] # Unprioritize all edges between flagged nids for nid2 in it.chain(needs_flag, already_flagged): cc2 = infr.pos_graph.component(nid2) all_flagged_edges.extend(nxu.edges_cross(infr.graph, cc1, cc2)) if infr.queue is not None or infr.params['inference.update_attrs']: all_unflagged_edges = [] unrev_unflagged_edges = [] unrev_graph = infr.unreviewed_graph # Reprioritize unreviewed edges between unflagged nids # Marked inferred state of all edges for nid2 in it.chain(needs_unflag, already_unflagged): cc2 = infr.pos_graph.component(nid2) if infr.queue is not None: _edges = nxu.edges_cross(unrev_graph, cc1, cc2) unrev_unflagged_edges.extend(_edges) if infr.params['inference.update_attrs']: _edges = nxu.edges_cross(infr.graph, cc1, cc2) all_unflagged_edges.extend(_edges) # Batch set prioritize infr._remove_edge_priority(all_flagged_edges) infr._reinstate_edge_priority(unrev_unflagged_edges) if infr.params['inference.update_attrs']: infr.set_edge_attrs( 'inferred_state', ub.dzip(all_flagged_edges, ['diff']) ) infr.set_edge_attrs( 'inferred_state', ub.dzip(all_unflagged_edges, [None]) )
def draw_perclass_prcurve(cx_to_info, classes=None, prefix='', fnum=1, **kw): """ Args: cx_to_info (PerClass_Measures | Dict): Example: >>> # xdoctest: +REQUIRES(module:kwplot) >>> from kwcoco.metrics.drawing import * # NOQA >>> from kwcoco.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo( >>> nimgs=3, nboxes=(0, 10), n_fp=(0, 3), n_fn=(0, 2), classes=3, score_noise=0.1, box_noise=0.1, with_probs=False) >>> cfsn_vecs = dmet.confusion_vectors() >>> print(cfsn_vecs.data.pandas()) >>> classes = cfsn_vecs.classes >>> cx_to_info = cfsn_vecs.binarize_ovr().measures()['perclass'] >>> print('cx_to_info = {}'.format(ub.repr2(cx_to_info, nl=1))) >>> import kwplot >>> kwplot.autompl() >>> draw_perclass_prcurve(cx_to_info, classes) >>> # xdoctest: +REQUIRES(--show) >>> kwplot.show_if_requested() Ignore: from kwcoco.metrics.drawing import * # NOQA import xdev globals().update(xdev.get_func_kwargs(draw_perclass_prcurve)) """ import kwplot # Sort by descending AP cxs = list(cx_to_info.keys()) priority = np.array([item['ap'] for item in cx_to_info.values()]) priority[np.isnan(priority)] = -np.inf cxs = list(ub.take(cxs, np.argsort(priority)))[::-1] aps = [] xydata = ub.odict() for cx in cxs: info = cx_to_info[cx] catname = classes[cx] if isinstance(cx, int) else cx ap = info['ap'] if 'pr' in info: pr = info['pr'] elif 'ppv' in info: pr = (info['ppv'], info['tpr']) elif 'prec' in info: pr = (info['prec'], info['rec']) else: raise KeyError('pr, prec, or ppv not in info') if np.isfinite(ap): aps.append(ap) (precision, recall) = pr else: aps.append(np.nan) precision, recall = [0], [0] if precision is None and recall is None: # I thought AP=nan in this case, but I missed something precision, recall = [0], [0] label_suffix = _realpos_label_suffix(info) label = 'ap={:0.2f}: {} ({})'.format(ap, catname, label_suffix) xydata[label] = (recall, precision) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Mean of empty slice', RuntimeWarning) mAP = np.nanmean(aps) if 0: import seaborn as sns import pandas as pd # sns.set() # TODO: deprecate multi_plot for seaborn? data_groups = { key: {'recall': r, 'precision': p} for key, (r, p) in xydata.items() } print('data_groups = {}'.format(ub.repr2(data_groups, nl=3))) longform = [] for key, subdata in data_groups.items(): subdata = pd.DataFrame.from_dict(subdata) subdata['label'] = key longform.append(subdata) data = pd.concat(longform) fig = kwplot.figure(fnum=fnum) ax = fig.gca() longform = [] for key, (r, p) in xydata.items(): subdata = pd.DataFrame.from_dict({'recall': r, 'precision': p, 'label': key}) longform.append(subdata) data = pd.concat(longform) palette = ub.dzip(xydata.keys(), kwplot.distinct_colors(len(xydata))) # markers = ub.dzip(xydata.keys(), kwplot.distinct_markers(len(xydata))) sns.lineplot( data=data, x='recall', y='precision', hue='label', style='label', ax=ax, # markers=markers, estimator=None, ci=0, hue_order=list(xydata.keys()), palette=palette, ) ax.set_xlim(0, 1) ax.set_ylim(0, 1) else: ax = kwplot.multi_plot( xydata=xydata, fnum=fnum, xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01, xlabel='recall', ylabel='precision', err_style='bars', title=prefix + 'OVR mAP={:.4f}'.format(mAP), legend_loc='lower right', color='distinct', linestyle='cycle', marker='cycle', **kw ) return ax
def ensure_priority_scores(infr, priority_edges): """ Ensures that priority attributes are assigned to the edges. This does not change the state of the queue. Doctest: >>> from graphid import demo >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2) >>> edges = list(infr.edges()) >>> infr.ensure_priority_scores(edges) """ if infr.verifiers: infr.print( 'Prioritizing {} edges with one-vs-one probs'.format( len(priority_edges)), 1) infr.ensure_task_probs(priority_edges) primary_task = 'match_state' match_probs = infr.task_probs[primary_task] primary_thresh = infr.task_thresh[primary_task] # Read match_probs into a DataFrame primary_probs = pd.DataFrame( list(ub.take(match_probs, priority_edges)), index=util.ensure_multi_index(priority_edges, ('aid1', 'aid2'))) # Convert match-state probabilities into priorities prob_match = primary_probs[POSTV] # Initialize priorities to probability of matching default_priority = prob_match.copy() # If the edges are currently between the same individual, then # prioritize by non-positive probability (because those edges might # expose an inconsistency) already_pos = [ infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v) for u, v in priority_edges ] default_priority[already_pos] = 1 - default_priority[already_pos] if infr.params['autoreview.enabled']: if infr.params['autoreview.prioritize_nonpos']: # Give positives that pass automatic thresholds high priority _probs = primary_probs[POSTV] flags = _probs > primary_thresh[POSTV] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 # Give negatives that pass automatic thresholds high priority _probs = primary_probs[NEGTV] flags = _probs > primary_thresh[NEGTV] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 # Give not-comps that pass automatic thresholds high priority _probs = primary_probs[INCMP] flags = _probs > primary_thresh[INCMP] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 infr.set_edge_attrs('prob_match', prob_match.to_dict()) infr.set_edge_attrs('default_priority', default_priority.to_dict()) metric = 'default_priority' priority = default_priority elif infr.cm_list is not None: infr.print( 'Prioritizing {} edges with one-vs-vsmany scores'.format( len(priority_edges), 1)) # Not given any deploy classifier, this is the best we can do scores = infr._make_lnbnn_scores(priority_edges) metric = 'normscore' priority = scores else: infr.print('WARNING: No verifiers to prioritize {} edge(s)'.format( len(priority_edges))) metric = 'random' priority = np.zeros(len(priority_edges)) + 1e-6 infr.set_edge_attrs(metric, ub.dzip(priority_edges, priority)) return metric, priority
def _fix_keys(model_state_dict): """ Hack around DataParallel wrapper. If there is nothing in common between the two models check to see if prepending 'module.' to other keys fixes it. """ other_keys = set(model_state_dict) self_keys = set(self_state) if 0: # Automatic way to reduce nodes in the trees? # If node b always follows node a, can we contract it? nodes1 = [n for p in other_keys for n in p.split('.')] nodes2 = [n for p in self_keys for n in p.split('.')] tups1 = list(tup for key in other_keys for tup in ub.iter_window(key.split('.'), 2)) tups2 = list(tup for key in self_keys for tup in ub.iter_window(key.split('.'), 2)) x = ub.ddict(list) for a, b in tups1: x[a].append(b) for a, b in tups2: x[a].append(b) nodehist = ub.dict_hist(nodes1 + nodes2) for k, v in x.items(): print('----') print(k) print(nodehist[k]) follow_hist = ub.dict_hist(v) print(follow_hist) total = sum(follow_hist.values()) if ub.allsame(follow_hist.values()) and total == nodehist[k]: print('CONTRACT') # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2])) # print(forest_str(paths_to_otree(other_keys, '.'))) # common_keys = other_keys.intersection(self_keys) # if not common_keys: if not other_keys.issubset(self_keys): if association == 'strict': pass elif association == 'module-hack': # If there are no common keys try a hack prefix = 'module.' def smap(f, ss): return set(map(f, ss)) def fix1(k): return prefix + k def fix2(k): if k.startswith(prefix): return k[len(prefix):] if smap(fix1, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix1, model_state_dict) elif smap(fix2, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix2, model_state_dict) elif association == 'prefix-hack': import functools def add_prefix(k, prefix): return prefix + k def remove_prefix(k, prefix): if k.startswith(prefix): return k[len(prefix):] # set1 = other_keys # target_set2 = self_keys found = _best_prefix_transform(other_keys, self_keys) if found is not None: for action, prefix in found['transform']: if action == 'add': func = functools.partial(add_prefix, prefix=prefix) elif action == 'remove': func = functools.partial(remove_prefix, prefix=prefix) else: raise AssertionError model_state_dict = ub.map_keys(func, model_state_dict) elif association in {'embedding', 'isomorphism'}: if verbose > 1: print('Using subpath {} association, may take some time'. format(association)) # I believe this is the correct way to solve the problem paths1 = sorted(other_keys) paths2 = sorted(self_state) if 1: # hack to filter to reduce tree size in embedding problem def shrink_paths(paths): new_paths = [] for p in paths: p = p.replace('.0', ':0') p = p.replace('.1', ':1') p = p.replace('.2', ':2') p = p.replace('.3', ':3') p = p.replace('.4', ':4') p = p.replace('.5', ':5') p = p.replace('.6', ':6') p = p.replace('.7', ':7') p = p.replace('.8', ':8') p = p.replace('.9', ':9') p = p.replace('.weight', ':weight') p = p.replace('.bias', ':bias') p = p.replace('.num_batches_tracked', ':num_batches_tracked') p = p.replace('.running_mean', ':running_mean') p = p.replace('.running_var', ':running_var') # p = p.replace('.conv1', ':conv1') # p = p.replace('.conv2', ':conv2') # p = p.replace('.conv3', ':conv3') # p = p.replace('.bn1', ':bn1') # p = p.replace('.bn2', ':bn2') # p = p.replace('.bn3', ':bn3') new_paths.append(p) return new_paths # Reducing the depth saves a lot of time paths1_ = shrink_paths(paths1) paths2_ = shrink_paths(paths2) subpaths1, subpaths2 = maximum_common_ordered_subpaths( paths1_, paths2_, sep='.', mode=association) subpaths1 = [p.replace(':', '.') for p in subpaths1] subpaths2 = [p.replace(':', '.') for p in subpaths2] mapping = ub.dzip(subpaths1, subpaths2) if verbose > 1: other_unmapped = sorted(other_keys - set(mapping.keys())) self_unmapped = sorted(self_keys - set(mapping.values())) print('-- embed association (other -> self) --') print('mapping = {}'.format(ub.repr2(mapping, nl=1))) print('self_unmapped = {}'.format( ub.repr2(self_unmapped, nl=1))) print('other_unmapped = {}'.format( ub.repr2(other_unmapped, nl=1))) print('len(mapping) = {}'.format( ub.repr2(len(mapping), nl=1))) print('len(self_unmapped) = {}'.format( ub.repr2(len(self_unmapped), nl=1))) print('len(other_unmapped) = {}'.format( ub.repr2(len(other_unmapped), nl=1))) print('-- end embed association --') # HACK: something might be wrong, there was an instance with # HRNet_w32 where multiple keys mapped to the same key # bad keys were incre_modules.3.0.conv1.weight and conv1.weight # # This will not error, but may produce bad output try: model_state_dict = ub.map_keys(lambda k: mapping.get(k, k), model_state_dict) except Exception as ex: HACK = 1 if HACK: new_state_dict_ = {} for k, v in model_state_dict.items(): new_state_dict_[mapping.get(k, k)] = v model_state_dict = new_state_dict_ warnings.warn('ex = {!r}'.format(ex)) else: raise else: raise KeyError(association) return model_state_dict
def _dz(a, b): a = a.tolist() if isinstance(a, np.ndarray) else list(a) b = b.tolist() if isinstance(b, np.ndarray) else list(b) return ub.dzip(a, b)
def gpu_info(new_mode=True, respect_visible_devices=True): """ Run nvidia-smi and parse output Args: new_mode: internal argument that changes the underlying implementation respect_visible_devices (bool, default=True): if True respects CUDA_VISIBLE_DEVICES environment variable, otherwise returns data corresponding to physical GPU indexes. Returns: OrderedDict: info about each GPU indexed by gpu number Note: Not gaurenteed to work if CUDA is not installed. Warnings: if nvidia-smi is not installed CommandLine: xdoctest -m netharn.device gpu_info --cuda Example: >>> # xdoctest: +REQUIRES(--cuda) >>> from netharn.device import gpu_info >>> gpus = gpu_info() >>> # xdoctest: +IGNORE_WANT >>> print('gpus = {}'.format(ub.repr2(gpus, nl=4))) >>> assert len(gpus) == torch.cuda.device_count() gpus = { 0: { 'gpu_uuid': 'GPU-348ebe36-252b-46fa-8a97-477ae331f6f4', 'index': '0', 'mem_avail': 10013.0, 'mem_total': 11170.0, 'mem_used': 1157.0, 'memory.free': '10013 MiB', 'memory.total': '11170 MiB', 'memory.used': '1157 MiB', 'name': 'GeForce GTX 1080 Ti', 'num': 0, 'num_compute_procs': 1, 'procs': [ { 'gpu_num': 0, 'gpu_uuid': 'GPU-348ebe36-252b-46fa-8a97-477ae331f6f4', 'name': '/usr/bin/python', 'pid': '19912', 'type': 'C', 'used_memory': '567 MiB', }, ], }, } """ pass """ Ignore: # official nvidia-smi python bindings pip install nvidia-ml-py import pynvml # TODO: make more efficient calls to nvidia-smi utilization.gpu utilization.memory compute_mode memory.total memory.used memory.free index name count nvidia-smi pmon --count 1 nvidia-smi -h nvidia-smi --help-query-compute-apps nvidia-smi --help-query-gpu nvidia-smi --help-query-accounted-apps nvidia-smi --help-query-supported-clocks nvidia-smi --help-query-retired-pages nvidia-smi --query-accounted-apps="pid" --format=csv nvidia-smi --query-gpu="index,memory.total,memory.used,memory.free,count,name,gpu_uuid" --format=csv nvidia-smi --query-compute-apps="pid,name,gpu_uuid,used_memory" --format=csv nvidia-smi --query-accounted-apps="gpu_name,pid" --format=csv import timerit ti = timerit.Timerit(40, bestof=5, verbose=2) for timer in ti.reset('new1'): with timer: gpu_info(True) for timer in ti.reset('old'): with timer: gpu_info(False) for timer in ti.reset('xml'): with timer: gpu_info('xml') xdev.profile_now(gpu_info)('xml') for timer in ti.reset('cmd'): with timer: ub.cmd(['nvidia-smi', '--query', '--xml-format']) for timer in ti.reset('check_output'): with timer: import subprocess subprocess.check_output(['nvidia-smi', '--query', '--xml-format']) """ if new_mode == 'xml': # Parse info out of the nvidia xml query # note, that even though this has less calls to nvidia-smi, there # is a lot more output, which makes it the slowest method especially # for multi-gpu systems import xml.etree.ElementTree as ET info = ub.cmd(['nvidia-smi', '--query', '--xml-format']) if info['ret'] != 0: print(info['out']) print(info['err']) warnings.warn('Problem running nvidia-smi: ret='.format( info['ret'])) raise NvidiaSMIError xml_string = info['out'] root = ET.fromstring(xml_string) gpus = {} for gpu_elem in root.findall('gpu'): gpu = {} gpu['uuid'] = gpu_elem.find('uuid').text gpu['name'] = gpu_elem.find('product_name').text gpu['num'] = int(gpu_elem.find('minor_number').text) gpu['procs'] = [{item.tag: item.text for item in proc_elem} for proc_elem in gpu_elem.find('processes')] for item in gpu_elem.find('fb_memory_usage'): gpu['memory.' + item.tag] = item.text gpu['mem_used'] = float(gpu['memory.used'].strip().replace( 'MiB', '')) gpu['mem_total'] = float(gpu['memory.total'].strip().replace( 'MiB', '')) gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used'] gpus[gpu['num']] = gpu # Let each GPU know how many processes are currently using it num_compute_procs = 0 num_graphics_procs = 0 for proc in gpu['procs']: if proc['type'] == 'C': num_compute_procs += 1 elif proc['type'] == 'G': num_graphics_procs += 1 else: raise NotImplementedError(proc['type']) gpu['num_compute_procs'] = num_compute_procs gpu['num_graphics_procs'] = num_graphics_procs elif new_mode: # This is slightly more robust than the old mode, but it also makes # more than one call to nvidia-smi and cannot return information about # graphics processes. fields = [ 'index', 'memory.total', 'memory.used', 'memory.free', 'name', 'gpu_uuid' ] mode = 'query-gpu' try: gpu_rows = _query_nvidia_smi(mode, fields) except Exception as ex: warnings.warn('Problem running nvidia-smi: {!r}'.format(ex)) raise NvidiaSMIError fields = ['pid', 'name', 'gpu_uuid', 'used_memory'] mode = 'query-compute-apps' proc_rows = _query_nvidia_smi(mode, fields) # Coerce into the old-style format for backwards compatibility gpus = {} for row in gpu_rows: gpu = row.copy() num = int(gpu['index']) gpu['num'] = num gpu['mem_used'] = float(gpu['memory.used'].strip().replace( 'MiB', '')) gpu['mem_total'] = float(gpu['memory.total'].strip().replace( 'MiB', '')) gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used'] gpu['procs'] = [] gpus[num] = gpu gpu_uuid_to_num = { gpu['gpu_uuid']: gpu['num'] for gpu in gpus.values() } for row in proc_rows: # Give each GPU info on which processes are using it proc = row.copy() proc['type'] = 'C' proc['gpu_num'] = gpu_uuid_to_num[proc['gpu_uuid']] num = proc['gpu_num'] gpus[num]['procs'].append(proc) WITH_GPU_PROCS = False if WITH_GPU_PROCS: # Hacks in gpu-procs if enabled import re info = ub.cmd('nvidia-smi pmon -c 1') for line in info['out'].split('\n'): line = line.strip() if line and not line.startswith("#"): parts = re.split(r'\s+', line, maxsplit=7) if parts[1] != '-': header = [ 'gpu_num', 'pid', 'type', 'sm', 'mem', 'enc', 'dec', 'name' ] proc = ub.dzip(header, parts) proc['gpu_num'] = int(proc['gpu_num']) if proc['type'] == 'G': gpu = gpus[proc['gpu_num']] gpu['procs'].append(proc) proc['gpu_uuid'] = gpu['gpu_uuid'] for gpu in gpus.values(): # Let each GPU know how many processes are currently using it num_compute_procs = 0 num_graphics_procs = 0 for proc in gpu['procs']: if proc['type'] == 'C': num_compute_procs += 1 elif proc['type'] == 'G': num_graphics_procs += 1 else: raise NotImplementedError(proc['type']) # NOTE calling nvidia-smi in query mode does not seem to have # support for getting info about graphics procs. gpu['num_compute_procs'] = num_compute_procs if WITH_GPU_PROCS: gpu['num_graphics_procs'] = num_graphics_procs else: # This is the original implementation of this function. It parses the # direct output of nvidia smi, it is prone to failure if the format of # this program's output ever changes. try: result = ub.cmd('nvidia-smi') if result['ret'] != 0: warnings.warn('Problem running nvidia-smi.') raise NvidiaSMIError except Exception: warnings.warn('Could not run nvidia-smi.') raise NvidiaSMIError lines = result['out'].splitlines() gpu_lines = [] proc_lines = [] current = None state = '0_gpu_read' for line in lines: if current is None: # Signals the start of GPU info if line.startswith('|====='): current = [] else: if state == '0_gpu_read': if len(line.strip()) == 0: # End of GPU info state = '1_proc_read' current = None elif line.startswith('+----'): # Move to the next GPU gpu_lines.append(current) current = [] else: current.append(line) elif state == '1_proc_read': if line.startswith('+----'): # Move to the next GPU # End of proc info state = 'terminate' break else: proc_lines.append(line) else: raise AssertionError(state) def parse_gpu_lines(lines): line1 = lines[0] line2 = lines[1] gpu = {} gpu['name'] = ' '.join(line1.split('|')[1].split()[1:-1]) gpu['num'] = int(' '.join(line1.split('|')[1].split()[0])) mempart = line2.split('|')[2].strip() part1, part2 = mempart.split('/') gpu['mem_used'] = float(part1.strip().replace('MiB', '')) gpu['mem_total'] = float(part2.strip().replace('MiB', '')) gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used'] return gpu def parse_proc_line(line): inner = '|'.join(line.split('|')[1:-1]) if 'no running processes found' in inner.lower(): # Handle "No running processes found" case in issue #2 return None parts = [p.strip() for p in inner.split(' ')] parts = [p for p in parts if p] index = int(parts[0]) pid = int(parts[1]) proc_type = str(parts[2]) proc_name = str(parts[3]) used_mem = float(parts[4].replace('MiB', '')) proc = { 'gpu_num': index, 'pid': pid, 'type': proc_type, 'name': proc_name, 'used_mem': used_mem, } return proc gpus = {} for num, lines in enumerate(gpu_lines): gpu = parse_gpu_lines(lines) assert num == gpu['num'], ( 'nums ({}, {}) do not agree. probably a parsing error'.format( num, gpu['num'])) assert num not in gpus, ( 'Multiple GPUs labeled as num {}. Probably a parsing error'. format(num)) gpus[num] = gpu gpus[num]['procs'] = [] for line in proc_lines: # Give each GPU info on which processes are using it proc = parse_proc_line(line) if proc is not None: num = proc['gpu_num'] gpus[num]['procs'].append(proc) for gpu in gpus.values(): # Let each GPU know how many processes are currently using it num_compute_procs = 0 num_graphics_procs = 0 for proc in gpu['procs']: if proc['type'] == 'C': num_compute_procs += 1 elif proc['type'] == 'G': num_graphics_procs += 1 else: raise NotImplementedError(proc['type']) gpu['num_compute_procs'] = num_compute_procs gpu['num_graphics_procs'] = num_graphics_procs if respect_visible_devices: # Respect CUDA_VISIBLE_DEVICES, nvidia-smi does not respect this by # default so remap to gain the appropriate effect. val = os.environ.get('CUDA_VISIBLE_DEVICES', '') parts = (p.strip() for p in val.split(',')) visible_devices = [int(p) for p in parts if p] if visible_devices: remapped = {} for visible_idx, real_idx in enumerate(visible_devices): gpu = remapped[visible_idx] = gpus[real_idx] gpu['index'] = str(visible_idx) gpu['num'] = visible_idx gpu['real_num'] = real_idx gpus = remapped return gpus
def find_mst_edges(infr, label='name_label'): """ Returns edges to augment existing PCCs (by label) in order to ensure they are connected with positive edges. Example: >>> # DISABLE_DOCTEST >>> from graphid.core.mixin_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> infr = ibeis.AnnotInference(ibs, 'all', autoinit=True) >>> label = 'orig_name_label' >>> label = 'name_label' >>> infr.find_mst_edges() >>> infr.ensure_mst() Ignore: old_mst_edges = [ e for e, d in infr.edges(data=True) if d.get('user_id', None) == 'algo:mst' ] infr.graph.remove_edges_from(old_mst_edges) infr.pos_graph.remove_edges_from(old_mst_edges) infr.neg_graph.remove_edges_from(old_mst_edges) infr.incomp_graph.remove_edges_from(old_mst_edges) """ # Find clusters by labels node_to_label = infr.get_node_attrs(label) label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values()) weight_heuristic = False # infr.ibs is not None if weight_heuristic: annots = infr.ibs.annots(infr.aids) node_to_time = ub.dzip(annots, annots.time) node_to_view = ub.dzip(annots, annots.viewpoint_code) enabled_heuristics = { 'view_weight', 'time_weight', } def _heuristic_weighting(nodes, avail_uv): avail_uv = np.array(avail_uv) weights = np.ones(len(avail_uv)) if 'view_weight' in enabled_heuristics: from graphid.core import _rhomb_dist view_edge = [(node_to_view[u], node_to_view[v]) for (u, v) in avail_uv] view_weight = np.array([ _rhomb_dist.VIEW_CODE_DIST[(v1, v2)] for (v1, v2) in view_edge ]) # Assume comparable by default and prefer undefined # more than probably not, but less than definately so. view_weight[np.isnan(view_weight)] = 1.5 # Prefer viewpoint 10x more than time weights += 10 * view_weight if 'time_weight' in enabled_heuristics: # Prefer linking annotations closer in time times = list(ub.take(node_to_time, nodes)) maxtime = util.safe_max(times, fill=1, nans=False) mintime = util.safe_min(times, fill=0, nans=False) time_denom = maxtime - mintime # Try linking by time for lynx data time_delta = np.array([ abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv ]) time_weight = time_delta / time_denom weights += time_weight weights = np.array(weights) weights[np.isnan(weights)] = 1.0 avail = [(u, v, { 'weight': w }) for (u, v), w in zip(avail_uv, weights)] return avail new_edges = [] prog = ub.ProgIter(list(label_to_nodes.keys()), desc='finding mst edges', enabled=infr.verbose > 0) for nid in prog: nodes = set(label_to_nodes[nid]) if len(nodes) == 1: continue # We want to make this CC connected pos_sub = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = set( it.starmap( e_, it.chain( nxu.edges_inside(infr.neg_graph, nodes), nxu.edges_inside(infr.incomp_graph, nodes), # nxu.edges_inside(infr.unknown_graph, nodes), ))) if len(impossible) == 0 and not weight_heuristic: # Simple mst augmentation aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1)) else: complement = it.starmap(e_, nxu.complement_edges(pos_sub)) avail_uv = [(u, v) for u, v in complement if (u, v) not in impossible] if weight_heuristic: # Can do heuristic weighting to improve the MST avail = _heuristic_weighting(nodes, avail_uv) else: avail = avail_uv # print(len(pos_sub)) try: aug_edges = list( nxu.k_edge_augmentation(pos_sub, k=1, avail=avail)) except nx.NetworkXUnfeasible: print('Warning: MST augmentation is not feasible') print('explicit negative edges might disconnect a PCC') aug_edges = list( nxu.k_edge_augmentation(pos_sub, k=1, avail=avail, partial=True)) new_edges.extend(aug_edges) prog.ensure_newline() for edge in new_edges: assert not infr.graph.has_edge(*edge), ( 'alrady have edge={}'.format(edge)) return new_edges
def benchmark_template(): import ubelt as ub import pandas as pd import timerit def method1(x, y, z): ret = [] for i in range((x + y) * z): ret.append(i) return ret def method2(x, y, z): ret = [i for i in range((x + y) * z)] return ret method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': ['method1', 'method2'], 'x': list(range(7)), 'y': [0, 100], 'z': [2, 3] # 'param_name': [param values], } xlabel = 'x' # Set these to param labels that directly transfer to method kwargs kw_labels = ['x', 'y', 'z'] # Set these to empty lists if they are not used group_labels = { 'style': ['y'], 'size': ['z'], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times chunk_iter = ub.chunks(ti.times, ti.bestof) times = list(map(min, chunk_iter)) # TODO: timerit method for this for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time' ]].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted( set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title('Benchmark Name') ax.set_xlabel('Size (todo: A better x-variable description)') ax.set_ylabel('Time (todo: A better y-variable description)') # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def _precompute_class_weights(dset, mode='median-idf'): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples')) >>> from sseg_camvid import * # NOQA >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize() >>> dset = harn.datasets['train'] """ assert mode in ['median-idf', 'log-median-idf'] total_freq = _cached_class_frequency(dset) def logb(arr, base): if base == 'e': return np.log(arr) elif base == 2: return np.log2(arr) elif base == 10: return np.log10(arr) else: out = np.log(arr) out /= np.log(base) return out _min, _max = np.percentile(total_freq, [5, 95]) is_valid = (_min <= total_freq) & (total_freq <= _max) if np.any(is_valid): middle_value = np.median(total_freq[is_valid]) else: middle_value = np.median(total_freq) # variant of median-inverse-frequency nonzero_freq = total_freq[total_freq != 0] if len(nonzero_freq): total_freq[total_freq == 0] = nonzero_freq.min() / 2 if mode == 'median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 elif mode == 'log-median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 base = 2 base = np.exp(1) weights = logb(weights + (base - 1), base) weights = np.maximum(weights, .1) weights = np.minimum(weights, 10) else: raise KeyError('mode = {!r}'.format(mode)) weights = np.round(weights, 2) cname_to_weight = ub.dzip(dset.classes, weights) print('weights: ' + ub.repr2(cname_to_weight)) if False: # Inspect the weights import kwplot kwplot.autoplt() cname_to_weight = ub.dzip(dset.classes, weights) cname_to_weight = ub.dict_subset(cname_to_weight, ub.argsort(cname_to_weight)) kwplot.multi_plot( ydata=list(cname_to_weight.values()), kind='bar', xticklabels=list(cname_to_weight.keys()), xtick_rotation=90, fnum=2, doclf=True) return weights
def update_visual_attrs(infr, graph=None, show_reviewed_edges=True, show_unreviewed_edges=False, show_inferred_diff=True, show_inferred_same=True, show_recent_review=False, highlight_reviews=True, show_inconsistency=True, wavy=False, simple_labels=False, show_labels=True, reposition=True, use_image=False, edge_overrides=None, node_overrides=None, colorby='name_label', **kwargs # hide_unreviewed_inferred=True ): infr.print('update_visual_attrs', 3) if graph is None: graph = infr.graph # if hide_cuts is not None: # # show_unreviewed_cuts = not hide_cuts # show_reviewed_cuts = not hide_cuts if not getattr(infr, '_viz_init_nodes', False): infr._viz_init_nodes = True nx.set_node_attributes(graph, name='shape', values='circle') # infr.set_node_attrs('shape', 'circle') if getattr(infr, '_viz_image_config_dirty', True): infr.update_node_image_attribute(graph=graph, use_image=use_image) def get_any(dict_, keys, default=None): for key in keys: if key in dict_: return dict_[key] return default show_cand = get_any( kwargs, ['show_candidate_edges', 'show_candidates', 'show_cand']) if show_cand is not None: show_cand = True show_reviewed_edges = True show_unreviewed_edges = True show_inferred_diff = True show_inferred_same = True if kwargs.get('show_all'): show_cand = True # alpha_low = .5 alpha_med = .9 alpha_high = 1.0 dark_background = graph.graph.get('dark_background', None) # Ensure we are starting from a clean slate # if reposition: util.nx_delete_edge_attr(graph, infr.visual_edge_attrs_appearance) # Set annotation node labels node_to_nid = None if not show_labels: nx.set_node_attributes(graph, name='label', values=ub.dzip(graph.nodes(), [''])) else: if simple_labels: nx.set_node_attributes( graph, name='label', values={n: str(n) for n in graph.nodes()}) else: if node_to_nid is None: node_to_nid = nx.get_node_attributes(graph, 'name_label') node_to_view = nx.get_node_attributes(graph, 'viewpoint') if node_to_view: annotnode_to_label = { aid: 'aid=%r%s\nnid=%r' % (aid, node_to_view[aid], node_to_nid[aid]) for aid in graph.nodes() } else: annotnode_to_label = { aid: 'aid=%r\nnid=%r' % (aid, node_to_nid[aid]) for aid in graph.nodes() } nx.set_node_attributes(graph, name='label', values=annotnode_to_label) # NODE_COLOR: based on name_label color_nodes(graph, labelattr=colorby, outof=kwargs.get('outof', None), sat_adjust=-.4) # EDGES: # Grab different types of edges edges, edge_colors = infr.get_colored_edge_weights( graph, highlight_reviews) # reviewed_states = nx.get_edge_attributes(graph, 'evidence_decision') reviewed_states = { e: infr.edge_decision(e) for e in infr.graph.edges() } edge_to_inferred_state = nx.get_edge_attributes( graph, 'inferred_state') # dummy_edges = [edge for edge, flag in # nx.get_edge_attributes(graph, '_dummy_edge').items() # if flag] edge_to_reviewid = nx.get_edge_attributes(graph, 'review_id') recheck_edges = [ edge for edge, split in nx.get_edge_attributes( graph, 'maybe_error').items() if split ] decision_to_edge = util.group_pairs(reviewed_states.items()) neg_edges = decision_to_edge[NEGTV] pos_edges = decision_to_edge[POSTV] incomp_edges = decision_to_edge[INCMP] unreviewed_edges = decision_to_edge[UNREV] inferred_same = [ edge for edge, state in edge_to_inferred_state.items() if state == 'same' ] inferred_diff = [ edge for edge, state in edge_to_inferred_state.items() if state == 'diff' ] inconsistent_external = [ edge for edge, state in edge_to_inferred_state.items() if state == 'inconsistent_external' ] inferred_notcomp = [ edge for edge, state in edge_to_inferred_state.items() if state == 'notcomp' ] reviewed_edges = incomp_edges + pos_edges + neg_edges compared_edges = pos_edges + neg_edges uncompared_edges = util.setdiff(edges, compared_edges) nontrivial_inferred_same = util.setdiff( inferred_same, pos_edges + neg_edges + incomp_edges) nontrivial_inferred_diff = util.setdiff( inferred_diff, pos_edges + neg_edges + incomp_edges) nontrivial_inferred_edges = (nontrivial_inferred_same + nontrivial_inferred_diff) # EDGE_COLOR: based on edge_weight nx.set_edge_attributes(graph, name='color', values=ub.dzip(edges, edge_colors)) # LINE_WIDTH: based on review_state # unreviewed_width = 2.0 # reviewed_width = 5.0 unreviewed_width = 1.0 reviewed_width = 2.0 if highlight_reviews: nx.set_edge_attributes(graph, name='linewidth', values=ub.dzip(reviewed_edges, [reviewed_width])) nx.set_edge_attributes(graph, name='linewidth', values=ub.dzip(unreviewed_edges, [unreviewed_width])) else: nx.set_edge_attributes(graph, name='linewidth', values=ub.dzip(edges, [unreviewed_width])) # EDGE_STROKE: based on decision and maybe_error # fg = util.WHITE if dark_background else util.BLACK # nx.set_edge_attributes(graph, name='stroke', values=ub.dzip(reviewed_edges, [{'linewidth': 3, 'foreground': fg}])) if show_inconsistency: nx.set_edge_attributes( graph, name='stroke', values=ub.dzip(recheck_edges, [{ 'linewidth': 5, 'foreground': infr._error_color }])) # Set linestyles to emphasize PCCs # Dash lines between PCCs inferred to be different nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(inferred_diff, ['dashed'])) # Treat incomparable/incon-external inference as different nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(inferred_notcomp, ['dashed'])) nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(inconsistent_external, ['dashed'])) # Dot lines that we are unsure of nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(unreviewed_edges, ['dotted'])) # Cut edges are implicit and dashed # nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(cut_edges, [True])) # nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(cut_edges, ['dashed'])) # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(cut_edges, [alpha_med])) nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(uncompared_edges, [True])) # Only matching edges should impose constraints on the graph layout nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(neg_edges, [True])) nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(neg_edges, [alpha_med])) nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(incomp_edges, [True])) nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(incomp_edges, [alpha_med])) # Ensure reviewed edges are visible nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(reviewed_edges, [False])) nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(reviewed_edges, [alpha_high])) if True: # Infered same edges can be allowed to constrain in order # to make things look nice sometimes nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(inferred_same, [False])) nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(inferred_same, [alpha_high])) if not kwargs.get('show_same', True): nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(inferred_same, [0])) if not kwargs.get('show_diff', True): nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(inferred_diff, [0])) if not kwargs.get('show_positive_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(pos_edges, [0])) if not kwargs.get('show_negative_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(neg_edges, [0])) if not kwargs.get('show_incomparable_edges', True): nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(incomp_edges, [0])) if not kwargs.get('show_between', True): if node_to_nid is None: node_to_nid = nx.get_node_attributes(graph, 'name_label') between_edges = [(u, v) for u, v in edges if node_to_nid[u] != node_to_nid[v]] nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(between_edges, [0])) # SKETCH: based on inferred_edges # Make inferred edges wavy if wavy: # dict(scale=3.0, length=18.0, randomness=None)] nx.set_edge_attributes( graph, name='sketch', values=ub.dzip( nontrivial_inferred_edges, [dict(scale=10.0, length=64.0, randomness=None)])) # Make dummy edges more transparent # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(dummy_edges, [alpha_low])) selected_edges = kwargs.pop('selected_edges', None) # SHADOW: based on most recent # Increase visibility of nodes with the most recently changed timestamp if show_recent_review and edge_to_reviewid and selected_edges is None: review_ids = list(edge_to_reviewid.values()) recent_idxs = ub.argmax(review_ids, multi=True) recent_edges = list( ub.take(list(edge_to_reviewid.keys()), recent_idxs)) selected_edges = recent_edges if selected_edges is not None: # TODO: add photoshop-like parameters like # spread and size. offset is the same as angle and distance. nx.set_edge_attributes( graph, name='shadow', values=ub.dzip( selected_edges, [{ 'rho': .3, 'alpha': .6, 'shadow_color': 'w' if dark_background else 'k', 'offset': (0, 0), 'scale': 3.0, }])) # Z_ORDER: make sure nodes are on top nodes = list(graph.nodes()) nx.set_node_attributes(graph, name='zorder', values=ub.dzip(nodes, [10])) nx.set_edge_attributes(graph, name='zorder', values=ub.dzip(edges, [0])) nx.set_edge_attributes(graph, name='picker', values=ub.dzip(edges, [10])) # VISIBILITY: Set visibility of edges based on arguments if not show_reviewed_edges: infr.print('Making reviewed edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ub.dzip(reviewed_edges, ['invis'])) if not show_unreviewed_edges: infr.print('Making un-reviewed edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ub.dzip(unreviewed_edges, ['invis'])) if not show_inferred_same: infr.print('Making nontrivial_same edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ub.dzip(nontrivial_inferred_same, ['invis'])) if not show_inferred_diff: infr.print('Making nontrivial_diff edges invisible', 10) nx.set_edge_attributes(graph, name='style', values=ub.dzip(nontrivial_inferred_diff, ['invis'])) if selected_edges is not None: # Always show the most recent review (remove setting of invis) # infr.print('recent_edges = %r' % (recent_edges,)) nx.set_edge_attributes(graph, name='style', values=ub.dzip(selected_edges, [''])) if reposition: # LAYOUT: update the positioning layout def get_layoutkw(key, default): return kwargs.get(key, graph.graph.get(key, default)) layoutkw = dict(prog='neato', splines=get_layoutkw('splines', 'line'), fontsize=get_layoutkw('fontsize', None), fontname=get_layoutkw('fontname', None), sep=10 / 72, esep=1 / 72, nodesep=.1) layoutkw.update(kwargs) # print(ub.repr2(graph.edges)) try: util.nx_agraph_layout(graph, inplace=True, **layoutkw) except AttributeError: print('WARNING: errors may occur') if edge_overrides: for key, edge_to_attr in edge_overrides.items(): nx.set_edge_attributes(graph, name=key, values=edge_to_attr) if node_overrides: for key, node_to_attr in node_overrides.items(): nx.set_node_attributes(graph, name=key, values=node_to_attr)
'orig_name_label': 5977 }, 5430: { 'aid': 5430, 'name_label': 5977, 'orig_name_label': 5977 } } graph = nx.Graph(edges) graph.add_nodes_from(nodes.keys()) df = pd.DataFrame.from_dict(nodes, orient='index') nx.set_node_attributes(graph, name='orig_name_label', values=ub.dzip(df['aid'], df['orig_name_label'])) nx.set_node_attributes(graph, name='name_label', values=ub.dzip(df['aid'], df['name_label'])) aug_graph = graph node_to_label = nx.get_node_attributes(graph, 'name_label') aid1, aid2 = 2265, 2280 label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values()) aug_graph = graph.copy() # remove cut edges from augmented graph edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut')
def coco_from_viame_csv(csv_fpaths, images=None): @ub.memoize def lazy_image_list(): if images is None: raise Exception('must specify where the image root is') if isdir(images): image_dpath = images all_gpaths = [] import os for root, ds, fs in os.walk(image_dpath): IMG_EXT = {'png', 'jpg', 'jpeg', 'tif', 'tiff'} gpaths = [join(root, f) for f in fs if f.split('.')[-1].lower() in IMG_EXT] if len(gpaths) > 1 and len(ds) != 0: raise Exception('Images must be in a leaf directory') if len(all_gpaths) > 0: raise Exception('Images cannot be nested ATM') all_gpaths += gpaths all_gpaths = sorted(all_gpaths) else: raise NotImplementedError return all_gpaths indexed_images = None import kwcoco dset = kwcoco.CocoDataset() for csv_fpath in csv_fpaths: with open(csv_fpath, 'r') as file: text = file.read() lines = [line.strip() for line in text.split('\n')] lines = [line for line in lines if line and not line.startswith('#')] for line in lines: parts = line.split(',') tid = int(parts[0]) gname = parts[1] frame_index = int(parts[2]) if gname == '': if len(dset.imgs) == 0 or indexed_images: # I GUESS WE ARE SUPPOSED TO GUESS WHAT IMAGE IS WHICH if not indexed_images: indexed_images = lazy_image_list() try: gname = indexed_images[frame_index] except IndexError: continue else: # Also, VIAME-CSV lets the annotations run longer than the # image sequence, so account for that. # Skip this annotation continue tl_x, tl_y, br_x, br_y = map(float, parts[3:7]) w = br_x - tl_x h = br_y - tl_y bbox = [tl_x, tl_y, w, h] score = float(parts[7]) target_len = float(parts[8]) rest = parts[9:] catparts = [] rest_iter = iter(rest) for p in rest_iter: if p.startswith('('): catparts.append(p) final_parts = list(rest_iter) if final_parts: raise NotImplementedError catnames = rest[0::2] catscores = list(map(float, rest[1::2])) cat_to_score = ub.dzip(catnames, catscores) if cat_to_score: catname = ub.argmax(cat_to_score) cid = dset.ensure_category(name=catname) else: cid = None gid = dset.ensure_image(file_name=gname, frame_index=frame_index) kw = {} if target_len >= 0: kw['target_len'] = target_len if score >= 0: kw['score'] = score dset.add_annotation( image_id=gid, category_id=cid, track_id=tid, bbox=bbox, **kw ) return dset
def __init__(self, blocks_args=None, global_params=None): super(EfficientNet, self).__init__() assert isinstance(blocks_args, list), 'blocks_args should be a list' assert len(blocks_args) > 0, 'block args must be greater than 0' self._global_params = global_params self._blocks_args = blocks_args # Handle class specification import ndsampler import ubelt as ub classes = self._global_params.classes if classes is None: classes = self._global_params.num_classes self.classes = ndsampler.CategoryTree.coerce(classes) keys = self._global_params._fields vals = list(self._global_params) tmp = ub.dzip(keys, vals, cls=ub.odict) tmp['num_classes'] = len(self.classes) tmp['classes'] = self.classes.__json__() self._global_params = type(global_params)(**tmp) self.image_size = self._global_params._asdict()['image_size'] # import ubelt as ub # print(ub.repr2(self._global_params._asdict(), nl=-4)) # print(ub.repr2(self._global_params._asdict())) self._initkw = { 'blocks_args': self._blocks_args, 'global_params': self._global_params, } self.model_name = None # Get static or dynamic convolution depending on image size Conv2d = Conv2dDynamicSamePadding.forsize( image_size=global_params.image_size) # Batch norm parameters bn_mom = 1 - self._global_params.batch_norm_momentum bn_eps = self._global_params.batch_norm_epsilon # Stem in_channels = 3 # rgb out_channels = self.round_filters(32) # number of output channels self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) multiplier = global_params.depth_coefficient def round_repeats(repeats, multiplier): """ Round number of filters based on depth multiplier. """ if not multiplier: return repeats return int(math.ceil(multiplier * repeats)) # Build blocks self._blocks = nn.ModuleList([]) for block_args in self._blocks_args: # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=self.round_filters(block_args.input_filters), output_filters=self.round_filters(block_args.output_filters), num_repeat=round_repeats(block_args.num_repeat, multiplier)) # The first block needs to take care of stride and filter size increase. self._blocks.append(MBConvBlock(block_args, self._global_params)) if block_args.num_repeat > 1: block_args = block_args._replace( input_filters=block_args.output_filters, stride=1) for _ in range(block_args.num_repeat - 1): self._blocks.append( MBConvBlock(block_args, self._global_params)) # Head in_channels = block_args.output_filters # output of final block out_channels = self.round_filters(1280) self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) # Final linear layer self._avg_pooling = nn.AdaptiveAvgPool2d(1) self._dropout = nn.Dropout(self._global_params.dropout_rate) self._fc = nn.Linear(out_channels, self._global_params.num_classes) noli = global_params.noli self._noli = layers.rectify_nonlinearity(noli, dim=2)
def benchmark_nested_break(): """ There are several ways to do a nested break, but which one is best? https://twitter.com/nedbat/status/1515345787563220996 """ import ubelt as ub import pandas as pd import timerit import itertools as it def method1_itertools(iter1, iter2): for i, j in it.product(iter1, iter2): if i == 20 and j == 20: break def method2_except(iter1, iter2): class Found(Exception): pass try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise Found except Found: pass class FoundPredef(Exception): pass def method2_5_except_predef(iter1, iter2): try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise FoundPredef except FoundPredef: pass def method3_gendef(iter1, iter2): def genfunc(): for i in iter1: for j in iter2: yield i, j for i, j in genfunc(): if i == 20 and j == 20: break def method4_genexp(iter1, iter2): genexpr = ((i, j) for i in iter1 for j in iter2) for i, j in genexpr: if i == 20 and j == 20: break method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(1000, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over import numpy as np basis = { 'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'], # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int), # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int), 'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int), 'input_style': ['range', 'list', 'customized_iter'], # 'param_name': [param values], } xlabel = 'size' xinput_labels = ['n1', 'n2', 'size'] # Set these to param labels that directly transfer to method kwargs kw_labels = [] # Set these to empty lists if they are not used group_labels = { 'style': ['input_style'], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) def make_input(params): # Given the parameterization make the benchmark function input # n1 = params['n1'] # n2 = params['n2'] size = params['size'] n1 = int(np.sqrt(size)) n2 = int(np.sqrt(size)) if params['input_style'] == 'list': iter1 = list(range(n1)) iter2 = list(range(n1)) elif params['input_style'] == 'range': iter1 = range(n1) iter2 = range(n2) elif params['input_style'] == 'customized_iter': import random def rando1(): rng1 = random.Random(0) for _ in range(n1): yield rng1.randint(0, n2) def rando2(): rng2 = random.Random(1) for _ in range(n1): yield rng2.randint(0, n2) iter1 = rando1() iter2 = rando2() else: raise KeyError return {'iter1': iter1, 'iter2': iter2} # For each variation of your experiment, create a row. rows = [] for params in grid_iter: # size = params['n1'] * params['n2'] # params['size'] = size group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... kwargs.update(make_input(params)) with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1))) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}') ax.set_xlabel(f'{xlabel}') ax.set_ylabel('Time') ax.set_xscale('log') ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def 数组_合并为字典(items1, items2, cls=dict): return ub.dzip(items1, items2, cls)