def _build_index(self): """ build reverse indexes """ # create index anns, cats, imgs = {}, {}, {} gid_to_aids = ub.ddict(list) cid_to_gids = ub.ddict(list) cid_to_aids = ub.ddict(list) for ann in self.dataset.get('annotations', []): gid_to_aids[ann['image_id']].append(ann['id']) anns[ann['id']] = ann for img in self.dataset.get('images', []): imgs[img['id']] = img for cat in self.dataset.get('categories', []): cats[cat['id']] = cat if anns and cats: for ann in self.dataset['annotations']: cid_to_gids[ann['category_id']].append(ann['image_id']) for cat, gids in cid_to_gids.items(): aids = [aid for gid in gids for aid in gid_to_aids[gid]] cid_to_aids[cat] = aids # create class members self.gid_to_aids = gid_to_aids self.cid_to_gids = cid_to_gids self.cid_to_aids = cid_to_aids self.anns = anns self.imgs = imgs self.cats = cats
def count_ubelt_usage(): import ubelt as ub import glob from os.path import join names = [ 'xdoctest', 'netharn', 'xdev', 'xinspect', 'ndsampler', 'kwil', 'kwarray', 'kwimage', 'kwplot', 'scriptconfig', ] all_fpaths = [] for name in names: repo_fpath = ub.expandpath(join('~/code', name)) fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True) for fpath in fpaths: all_fpaths.append((name, fpath)) import re pat = re.compile(r'\bub\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b') import ubelt as ub pkg_to_hist = ub.ddict(lambda: ub.ddict(int)) for name, fpath in ub.ProgIter(all_fpaths): text = open(fpath, 'r').read() for match in pat.finditer(text): attr = match.groupdict()['attr'] if attr in ub.__all__: pkg_to_hist[name][attr] += 1 hist_iter = iter(pkg_to_hist.values()) usage = next(hist_iter).copy() for other in hist_iter: for k, v in other.items(): usage[k] += v for attr in ub.__all__: usage[attr] += 0 for name in pkg_to_hist.keys(): pkg_to_hist[name] = ub.odict( sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1]) usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1]) print(ub.repr2(pkg_to_hist, nl=2)) print(ub.repr2(usage, nl=1))
def score_voc(dmet, ovthresh=0.5, bias=1, method='voc2012', gids=None): recs = {} cx_to_lines = ub.ddict(list) # confusions = [] if gids is None: gids = dmet.pred.imgs.keys() for gid in gids: pred_annots = dmet.pred.annots(gid=gid) true_annots = dmet.true.annots(gid=gid) true_boxes = true_annots.boxes true_cxs = true_annots.cids true_weights = true_annots._lookup('weight') pred_boxes = pred_annots.boxes pred_cxs = pred_annots.cids pred_scores = pred_annots._lookup('score') recs[gid] = [] for bbox, cx, weight in zip(true_boxes.to_tlbr().data, true_cxs, true_weights): recs[gid].append({ 'bbox': bbox, 'difficult': weight < .5, 'name': cx }) for bbox, cx, score in zip(pred_boxes.to_tlbr().data, pred_cxs, pred_scores): cx_to_lines[cx].append([gid, score] + list(bbox)) perclass = ub.ddict(dict) for cx in cx_to_lines.keys(): lines = cx_to_lines[cx] classname = cx rec, prec, ap = voc_eval(lines, recs, classname, ovthresh=ovthresh, bias=bias, method=method) perclass[cx]['pr'] = (rec, prec) perclass[cx]['ap'] = ap mAP = np.nanmean([d['ap'] for d in perclass.values()]) voc_scores = { 'mAP': mAP, 'perclass': perclass, } return voc_scores
def _get_cm_edge_data(infr, edges, cm_list=None): symmetric = True if cm_list is None: cm_list = infr.cm_list # Find scores for the edges that exist in the graph edge_to_data = ub.ddict(dict) aid_to_cm = {cm.qaid: cm for cm in cm_list} for u, v in edges: if symmetric: u, v = e_(u, v) cm1 = aid_to_cm.get(u, None) cm2 = aid_to_cm.get(v, None) scores = [] ranks = [] for cm in util.filter_Nones([cm1, cm2]): for aid in [u, v]: idx = cm.daid2_idx.get(aid, None) if idx is None: continue score = cm.annot_score_list[idx] rank = cm.get_annot_ranks([aid])[0] scores.append(score) ranks.append(rank) if len(scores) == 0: score = None rank = None else: # Choose whichever one gave the best score idx = util.safe_argmax(scores, nans=False) score = scores[idx] rank = ranks[idx] edge_to_data[(u, v)]['score'] = score edge_to_data[(u, v)]['rank'] = rank return edge_to_data
def _BROKEN_rank_epochs(monitor): """ FIXME: broken - implement better rank aggregation with custom weights Example: >>> monitor = demodata_monitor() >>> monitor._BROKEN_rank_epochs() """ rankings = {} for key, value in monitor.best_epochs(smooth=False).items(): rankings[key + '_raw'] = value for key, value in monitor.best_epochs(smooth=True).items(): rankings[key + '_smooth'] = value # borda-like weighted rank aggregation. # probably could do something better. epoch_to_weight = ub.ddict(lambda: 0) for key, ranking in rankings.items(): # weights = np.linspace(0, 1, num=len(ranking))[::-1] weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100 for epoch, w in zip(ranking, weights): epoch_to_weight[epoch] += w agg_ranking = ub.argsort(epoch_to_weight)[::-1] return agg_ranking
def _choose_indices(harn): """ Hack to pick several images from the validation set to monitor each epoch. """ tag = harn.current_tag dset = harn.loaders[tag].dataset cid_to_gids = ub.ddict(set) empty_gids = [] for gid in range(len(dset)): annots = dset._load_annotation(gid) if len(annots['gt_classes']) == 0: empty_gids.append(gid) for cid, ishard in zip(annots['gt_classes'], annots['gt_ishard']): if not ishard: cid_to_gids[cid].add(gid) # Choose an image with each category chosen_gids = set() for cid, gids in cid_to_gids.items(): for gid in gids: if gid not in chosen_gids: chosen_gids.add(gid) break # Choose an image with nothing in it (if it exists) if empty_gids: chosen_gids.add(empty_gids[0]) chosen_indices = chosen_gids harn.chosen_indices[tag] = sorted(chosen_indices)
def bench_sort_dictionary(): """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_sort_dictionary Results: Timed best=25.484 µs, mean=25.701 ± 0.1 µs for itemgetter Timed best=28.810 µs, mean=29.138 ± 0.3 µs for lambda """ import operator as op import ubelt as ub import random import string rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] hist_ = ub.ddict(lambda: 0) for item in items: hist_[item] += 1 ti = ub.Timerit(1000, bestof=10, verbose=1) for timer in ti.reset('itemgetter'): with timer: # WINNER getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] for timer in ti.reset('lambda'): with timer: key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])] del key_order
def group_pfiles(cls, pfiles, step_idx=None): """ Creates groups of pfiles that *might* be the same. Example: >>> fpaths = _demodata_files() >>> pfiles = [ProgressiveFile(f) for f in fpaths] >>> groups1 = ProgressiveFile.group_pfiles(pfiles) >>> for pfile in pfiles: >>> pfile.refine() >>> groups2 = ProgressiveFile.group_pfiles(pfiles) >>> for pfile in pfiles[0::2]: >>> pfile.refine() >>> groups3 = ProgressiveFile.group_pfiles(pfiles) >>> for pfile in pfiles[1::2]: >>> pfile.refine() >>> groups4 = ProgressiveFile.group_pfiles(pfiles) """ if step_idx is not None: # We are given the step idx to use, so do that final_groups = ub.group_items(pfiles, key=lambda x: x.step_id(step_idx)) else: # Otherwise do something reasonable size_groups = ub.group_items(pfiles, key=lambda x: x.size) final_groups = ub.ddict(list) for group in size_groups.values(): # we have to use the minimum refine step available # for any unfinished pfile to ensure consistency step_idx = ProgressiveFile.compatible_step_idx(group) step_groups = ub.group_items(group, key=lambda x: x.step_id(step_idx)) for key, val in step_groups.items(): final_groups[key].extend(val) return final_groups
def score_netharn(dmet, ovthresh=0.5, bias=0, method='voc2012', gids=None): y_accum = ub.ddict(list) # confusions = [] if gids is None: gids = dmet.pred.imgs.keys() for gid in gids: pred_annots = dmet.pred.annots(gid=gid) true_annots = dmet.true.annots(gid=gid) true_boxes = true_annots.boxes true_cxs = true_annots.cids true_weights = true_annots._lookup('weight') pred_boxes = pred_annots.boxes pred_cxs = pred_annots.cids pred_scores = pred_annots._lookup('score') y = detection_confusions(true_boxes, true_cxs, true_weights, pred_boxes, pred_scores, pred_cxs, bg_weight=1.0, ovthresh=ovthresh, bg_cls=-1, bias=bias) y['gid'] = [gid] * len(y['pred']) for k, v in y.items(): y_accum[k].extend(v) y_df = pd.DataFrame(y_accum) # class agnostic score ap, prec, rec = pr_curves(y_df) peritem = { 'ap': ap, 'pr': (prec, rec), } # perclass scores perclass = {} cx_to_group = dict(iter(y_df.groupby('cx'))) for cx in cx_to_group: # for cx, group in cx_to_group.items(): group = cx_to_group.get(cx, None) ap, prec, rec = pr_curves(group, method=method) perclass[cx] = { 'ap': ap, 'pr': (prec, rec), } mAP = np.nanmean([d['ap'] for d in perclass.values()]) nh_scores = {'mAP': mAP, 'perclass': perclass, 'peritem': peritem} return nh_scores
def bench_closures(): """ Is it faster to use a closure or pass in the variables explicitly? """ import ubelt as ub import timerit import numpy as np # Test a nested func with vs without a closure def rand_complex(*shape): real = np.random.rand(*shape).astype(np.complex) imag = np.random.rand(*shape).astype(np.complex) * 1j mat = real + imag return mat s = int(ub.argval('--s', default='1')) mat1 = rand_complex(s, s) mat2 = rand_complex(s, s) N = 1000 offset = 100 def nested_closure(): mat3 = mat1 @ mat2 for i in range(N): mat3 += i + offset def nested_explicit(mat1, mat2, N, offset): mat3 = mat1 @ mat2 for i in range(N): mat3 += i + offset ti = timerit.Timerit(int(2**11), bestof=int(2**8), verbose=int(ub.argval('--verbose', default='1'))) for timer in ti.reset('nested_explicit'): with timer: nested_explicit(mat1, mat2, N, offset) for timer in ti.reset('nested_closure'): with timer: nested_closure() print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2))) print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2))) positions = ub.ddict(list) for m1, v1 in ti.rankings.items(): for pos, label in enumerate(ub.argsort(v1), start=0): positions[label].append(pos) average_position = ub.map_vals(lambda x: sum(x) / len(x), positions) print('average_position = {}'.format(ub.repr2(average_position)))
def _get_cm_agg_aid_ranking(infr, cc): aid_to_cm = {cm.qaid: cm for cm in infr.cm_list} all_scores = ub.ddict(list) for qaid in cc: cm = aid_to_cm[qaid] # should we be doing nids? for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()): all_scores[daid].append(score) max_scores = sorted( (max(scores), aid) for aid, scores in all_scores.items())[::-1] ranked_aids = util.take_column(max_scores, 1) return ranked_aids
def image_from_each_dataset(): groups = ub.ddict(list) for gid, img in merged.imgs.items(): groups[os.path.dirname(img['file_name'])].append(gid) gid_groups = [] for gids in groups.values(): gids = sort_gids_by_nannots(gids) gid_groups.append(gids) # round robin sample datas = [gid for x in zip(*gid_groups) for gid in x] return datas
def check_relationships(branches): ancestors = {b: set() for b in branches} length = len(branches) * (len(branches) - 1) for b1, b2 in ub.ProgIter(it.combinations(branches, 2), length=length): ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b1, b2))['ret'] if ret == 0: ancestors[b1].add(b2) ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b2, b1))['ret'] if ret == 0: ancestors[b2].add(b1) print('<key> is an ancestor of <value>') print(ub.repr2(ancestors)) descendants = {b: set() for b in branches} for key, others in ancestors.items(): for o in others: descendants[o].add(key) print('<key> descends from <value>') print(ub.repr2(descendants)) import plottool as pt import networkx as nx G = nx.DiGraph() G.add_nodes_from(branches) for key, others in ancestors.items(): for o in others: # G.add_edge(key, o) G.add_edge(o, key) from networkx.algorithms.connectivity.edge_augmentation import collapse flag = True G2 = G while flag: flag = False for u, v in list(G2.edges()): if G2.has_edge(v, u): G2 = collapse(G2, [[u, v]]) node_relabel = ub.ddict(list) for old, new in G2.graph['mapping'].items(): node_relabel[new].append(old) G2 = nx.relabel_nodes(G2, {k: '\n'.join(v) for k, v in node_relabel.items()}) flag = True break G3 = nx.transitive_reduction(G2) pt.show_nx(G3, arrow_width=1.5, prog='dot', layoutkw=dict(prog='dot')) pt.zoom_factory() pt.pan_factory() pt.plt.show()
def varied_values(dict_list, min_variations=1): """ Given a list of dictionaries, find the values that differ between them Args: dict_list (List[Dict]): The values of the dictionary must be hashable. Lists will be converted into tuples. min_variations (int, default=1): minimum number of variations to return TODO: - [ ] Is this a ubelt function? Example: >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/misc/notes')) >>> from hardwareinfo.backend_linux import * # NOQA >>> num_keys = 10 >>> num_dicts = 10 >>> all_keys = {ub.hash_data(i)[0:16] for i in range(num_keys)} >>> dict_list = [ >>> {key: ub.hash_data(key)[0:16] for key in all_keys} >>> for _ in range(num_dicts) >>> ] >>> import random >>> rng = random.Random(0) >>> for data in dict_list: >>> if rng.random() > 0.5: >>> for key in list(data): >>> if rng.random() > 0.9: >>> data[key] = rng.randint(1, 32) >>> varied = varied_values(dict_list) >>> print('varied = {}'.format(ub.repr2(varied, nl=1))) """ all_keys = set() for data in dict_list: all_keys.update(data.keys()) varied = ub.ddict(set) for data in dict_list: for key in all_keys: value = data.get(key, ub.NoParam) if isinstance(value, list): value = tuple(value) varied[key].add(value) for key, values in list(varied.items()): if len(values) <= min_variations: del varied[key] return varied
def rank_inventory(inventory): candidates = list(ub.flatten(list(pkmn.family(ancestors=False, node=True)) for pkmn in inventory)) groups = ub.group_items(candidates, key=lambda p: p.name) leages = { 'master': {'max_cp': float('inf')}, 'ultra': {'max_cp': 2500}, 'great': {'max_cp': 1500}, 'little': {'max_cp': 500}, } max_level = 45 # for XL candy # max_level = 40 # normal all_dfs = [] for name, group in groups.items(): print('\n\n------------\n\n') print('name = {!r}'.format(name)) for leage_name, leage_filters in leages.items(): max_cp = leage_filters['max_cp'] print('') print(' ========== ') print(' --- {} in {} --- '.format(name, leage_name)) not_eligible = [p for p in group if p.cp is not None and p.cp > max_cp] eligible = [p for p in group if p.cp is None or p.cp <= max_cp] print('not_eligible = {!r}'.format(not_eligible)) if len(eligible) > 0: first = ub.peek(eligible) have_ivs = eligible df = first.leage_rankings_for(have_ivs, max_cp=max_cp, max_level=max_level) all_dfs.append(df) else: print('none eligable') # Print out the best ranks for each set of IVS over all possible forms # (lets you know which ones can be transfered safely) iv_to_rank = ub.ddict(list) for df in all_dfs: if df is not None: df = df.set_index(['iva', 'ivd', 'ivs']) for iv, rank in zip(df.index, df['rank']): iv_to_rank[iv].append(rank) iv_to_best_rank = ub.map_vals(sorted, iv_to_rank) iv_to_best_rank = ub.sorted_vals(iv_to_best_rank) print('iv_to_best_rank = {}'.format(ub.repr2(iv_to_best_rank, nl=1, align=':')))
def __init__(evaluator, classes): evaluator.classes = classes # Remember metrics for each image individually evaluator.gid_to_metrics = {} # accum is a dictionary that will hold different metrics we accumulate evaluator.accum = ub.ddict(lambda : 0) # Estimate contains our current averaged metrics evaluator.estimate = {} # We don't care how we predict for the void class evaluator.void_idx = classes.index('background')
def bench_dict_hist(): import operator as op import ubelt as ub import random import string rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] hist_ = ub.ddict(lambda: 0) for item in items: hist_[item] += 1 OrderedDict = ub.odict ti = ub.Timerit(1000, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: getval = op.itemgetter(1) key_order = (key for (key, value) in sorted(hist_.items(), key=getval)) hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('time'): with timer: getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('itemgetter'): with timer: # WINNER getval = op.itemgetter(1) hist = OrderedDict([ (key, value) for (key, value) in sorted(hist_.items(), key=getval) ]) # ----------------- for timer in ti.reset('itemgetter'): with timer: # WINNER getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] for timer in ti.reset('lambda'): with timer: key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])]
def bench_dict_hist(): """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_dict_hist Results: Timed best=48.330 µs, mean=49.437 ± 1.0 µs for dict_subset_iter Timed best=59.392 µs, mean=63.395 ± 11.9 µs for dict_subset_list Timed best=47.203 µs, mean=47.632 ± 0.2 µs for direct_itemgetter """ import operator as op import ubelt as ub import random import string rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] hist_ = ub.ddict(lambda: 0) for item in items: hist_[item] += 1 OrderedDict = ub.odict ti = ub.Timerit(1000, bestof=10, verbose=1) for timer in ti.reset('dict_subset_iter'): with timer: getval = op.itemgetter(1) key_order = (key for (key, value) in sorted(hist_.items(), key=getval)) hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('dict_subset_list'): with timer: getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('direct_itemgetter'): with timer: # WINNER getval = op.itemgetter(1) hist = OrderedDict([ (key, value) for (key, value) in sorted(hist_.items(), key=getval) ]) del hist
def find_neg_nid_freq_to(infr, cc): """ Find the number of edges leaving `cc` and directed towards specific names. """ pos_graph = infr.pos_graph neg_graph = infr.neg_graph neg_nid_freq = ub.ddict(lambda: 0) for u in cc: nid1 = pos_graph.node_label(u) for v in neg_graph.neighbors(u): nid2 = pos_graph.node_label(v) if nid1 == nid2 and v not in cc: continue neg_nid_freq[nid2] += 1 return neg_nid_freq
def parse_timemap_from_blocks(self, profile_block_list): """ Build a map from times to line_profile blocks """ prefix_list = [] timemap = ub.ddict(list) for ix in range(len(profile_block_list)): block = profile_block_list[ix] total_time = self.get_block_totaltime(block) # Blocks without time go at the front of sorted output if total_time is None: prefix_list.append(block) # Blocks that are not run are not appended to output elif total_time != 0: timemap[total_time].append(block) return prefix_list, timemap
def group_pairs(pair_list): """ Groups a list of items using the first element in each pair as the item and the second element as the groupid. Args: pair_list (list): list of 2-tuples (item, groupid) Returns: dict: groupid_to_items: maps a groupid to a list of items """ # Initialize dict of lists groupid_to_items = ub.ddict(list) # Insert each item into the correct group for item, groupid in pair_list: groupid_to_items[groupid].append(item) return groupid_to_items
def _check_datas(task): scene_im_paths, scene_gt_paths = task._load_all_scene_paths() keys = task._preprocessing_keys() key_to_num = ub.ddict(list) for scene, key in it.product(task.scene_ids, keys): im_paths = scene_im_paths[scene][key] gt_paths = scene_gt_paths[scene][key] assert len(im_paths) == len(gt_paths) assert len(im_paths) > 0 key_to_num[key] += [len(im_paths)] for key, ns in key_to_num.items(): ns_set = set(ns) if len(ns_set) != 1: print('key = {!r}'.format(key)) print('ns_set = {!r}'.format(ns_set)) print('--')
def main(): import ubelt as ub from ubelt import util_list from ubelt.util_list import take import random from math import e # # Data N = 100 array = [random.random() for _ in range(N)] indices = [random.randint(0, N - 1) for _ in range(int(N // e))] ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1) for timer in ti.reset('take'): with timer: list(take(array, indices)) for timer in ti.reset('util_list.take'): with timer: list(util_list.take(array, indices)) for timer in ti.reset('ub.take'): with timer: list(ub.take(array, indices)) print('---') # import pandas as pd # df = pd.DataFrame(rankings) # print('df =\n{}'.format(df)) print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2))) print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2))) positions = ub.ddict(list) for m1, v1 in ti.rankings.items(): for pos, label in enumerate(ub.argsort(v1), start=0): positions[label].append(pos) average_position = ub.map_vals(lambda x: sum(x) / len(x), positions) print('average_position = {}'.format(ub.repr2(average_position)))
def best_epochs(monitor): rankings = {} def _rank(key, metrics, type='min'): values = [m[key] for m in metrics] sortx = np.argsort(values) if type == 'max': sortx = np.argsort(values)[::-1] elif type == 'min': sortx = np.argsort(values) else: raise KeyError(type) ranked_epochs = np.array(monitor.epochs)[sortx] return ranked_epochs for key in monitor.min_keys: rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'min') rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'min') for key in monitor.max_keys: rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'max') rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'max') for key in monitor.max_keys: values = [m[key] for m in monitor.raw_metrics] sortx = np.argsort(values)[::-1] ranked_epochs = np.array(monitor.epochs)[sortx] rankings[key] = ranked_epochs # borda-like weighted rank aggregation. # probably could do something better. epoch_to_weight = ub.ddict(lambda: 0) for key, ranking in rankings.items(): # weights = np.linspace(0, 1, num=len(ranking))[::-1] weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100 for epoch, w in zip(ranking, weights): epoch_to_weight[epoch] += w agg_ranking = ub.argsort(epoch_to_weight)[::-1] return agg_ranking
def benchmark_ondisk_crop(): import kwplot plt = kwplot.autoplt() region = 'small_random' dim = 3 # xdata = [64, 128, 256, 512] # xdata = [64, 128, 256, 320, 512, 640, 768, 896, 1024] # xdata = np.linspace(64, 4096, num=8).astype(np.int) # xdata = np.linspace(64, 2048, num=8).astype(np.int) # xdata = np.linspace(64, 1024, num=8).astype(np.int) xdata = [256, 1024, 4096, 8192, 16384] # xdata = [256, 1024, 4096, 8192] xdata = [256, 1024, 2048] # xdata = [256] ydata = ub.ddict(list) # for size in [64, 128, 256, 512, 1024, 2048, 4096]: for size in xdata: result = time_ondisk_crop(size, dim=dim, region=region, num=5) for key, val in result.items(): min, mean, std = val ydata[key].append(mean * 1e6) # Sort legend by descending time taken on the largest image ydata = ub.odict(sorted(ydata.items(), key=lambda i: -i[1][-1])) kwplot.multi_plot( xdata, ydata, ylabel='micro-seconds (us)', xlabel='image size', title='Chip region={} benchmark for {}D image data'.format( region, dim), # yscale='log', ymin=1, ) plt.show()
def after_initialize(harn): harn.xdata = [] harn.ydata = ub.ddict(list)
def _dump_monitor_tensorboard(harn, mode='epoch', special_groupers=['loss'], serial=False): """ Dumps PNGs to disk visualizing tensorboard scalars. Also dumps pickles to disk containing the same information. Args: mode (str | Tuple[str], default='epoch'): Can be either `epoch` or `iter`, or a tuple containing both. special_groupers (List[str], default=['loss']): list of strings indicating groups. For each item, a logged value is contained in that group if it contains that item as a substring. serial (bool, default=False): If True executes the drawing process in the main process, otherwise it forks a new process and runs in the background. CommandLine: xdoctest -m netharn.mixins _dump_monitor_tensorboard --profile Example: >>> import netharn as nh >>> from netharn.mixins import _dump_monitor_tensorboard >>> harn = nh.FitHarn.demo() >>> harn.run() >>> try: >>> _dump_monitor_tensorboard(harn) >>> except ImportError: >>> pass """ import ubelt as ub import netharn as nh from os.path import join import json import six from six.moves import cPickle as pickle # harn.debug('Plotting tensorboard data. serial={}, mode={}'.format(serial, mode)) train_dpath = harn.train_dpath tb_data = nh.util.read_tensorboard_scalars(train_dpath, cache=0, verbose=0) tb_data['meta'] = { 'nice': harn.hyper.nice, 'special_groupers': special_groupers, } out_dpath = ub.ensuredir((train_dpath, 'monitor', 'tensorboard')) # Write a script that the user can run to if not ub.WIN32: reviz_fpath = join(out_dpath, 'revisualize.sh') reviz_text = ub.codeblock(''' #!/bin/bash __heredoc__ = """ Helper script to visualize all of the results in the pkl / json files in this directory. """ REVIZ_DPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" xdoctest -m netharn.mixins _dump_measures --out_dpath=$REVIZ_DPATH ''') with open(reviz_fpath, 'w') as file: file.write(reviz_text) try: import os import stat orig_mode = os.stat(reviz_fpath).st_mode new_flags = stat.S_IXGRP | stat.S_IEXEC if (new_flags & orig_mode) != new_flags: new_mode = orig_mode | new_flags os.chmod(reviz_fpath, new_mode) except Exception as ex: print('ex = {!r}'.format(ex)) tb_data_pickle_fpath = join(out_dpath, 'tb_data.pkl') with open(tb_data_pickle_fpath, 'wb') as file: pickle.dump(tb_data, file) tb_data_json_fpath = join(out_dpath, 'tb_data.json') with open(tb_data_json_fpath, 'w') as file: if six.PY2: jsonkw = dict(indent=1) else: jsonkw = dict(indent=' ') try: json.dump(tb_data, file, **jsonkw) except Exception as ex: print('ex = {!r}'.format(ex)) json.dump( { 'error': 'Unable to write to json.', 'info': 'See pickle file: {}'.format(tb_data_json_fpath) }, file, **jsonkw) # The following function draws the tensorboard result # This might take a some non-trivial amount of time so we attempt to run in # a separate process. func = _dump_measures args = (tb_data, out_dpath, mode) if not serial: if False: # Maybe thread-safer way of doing this? Maybe not, there is a # management thread used by futures. from concurrent import futures if not hasattr(harn, '_internal_executor'): harn._internal_executor = futures.ProcessPoolExecutor( max_workers=1) harn._prev_job = None if harn._prev_job is None or harn._prev_job.done(): # Wait to before submitting another job # Unsure if its ok that this job might not be a daemon harn.info('DO MPL DRAW') job = harn._internal_executor.submit(func, *args) harn._prev_job = job else: if harn._prev_job is not None: harn.info('NOT DOING MPL DRAW') harn.warn('NOT DOING MPL DRAW') else: # This causes thread-unsafe warning messages in the inner loop # Likely because we are forking while a thread is alive if not hasattr(harn, '_internal_procs'): harn._internal_procs = ub.ddict(dict) # Clear finished processes from the pool for pid in list(harn._internal_procs[mode].keys()): proc = harn._internal_procs[mode][pid] if not proc.is_alive(): harn._internal_procs[mode].pop(pid) # only start a new process if there is room in the pool if len(harn._internal_procs[mode]) < 1: import multiprocessing proc = multiprocessing.Process(target=func, args=args) proc.daemon = True proc.start() harn._internal_procs[mode][proc.pid] = proc else: if 0: harn.warn('NOT DOING MPL DRAW') else: func(*args)
def __init__(self, classes=None): self.recs = {} self.cx_to_lines = ub.ddict(list) self.classes = classes
def _best_prefix_transform(set1, target_set2): """ Find a way to transform prefixes of items in set1 to match target_set2 Example: >>> set1 = {'mod.f.0.w', >>> 'mod.f.1.b', >>> 'mod.f.1.n', >>> 'mod.f.1.rm', >>> 'mod.f.1.rv',} >>> # >>> target_set2 = { >>> 'bar.foo.extra.f.1.b', >>> 'bar.foo.extra.f.1.n', >>> 'bar.foo.extra.f.1.w', >>> 'bar.foo.extra.f.3.w', >>> } >>> _best_prefix_transform(set1, target_set2) >>> target_set2.add('JUNK') >>> _best_prefix_transform(set1, target_set2) """ # probably an efficient way to do this with a trie # NOTE: In general this is a graph-isomorphism problem or a maximum common # subgraph problem. However, we can look only at the special case of # "maximum common subtrees". Given two directory structures (as trees) # we find the common bits. # https://perso.ensta-paris.fr/~diam/ro/online/viggo_wwwcompendium/node168.html # We can approximate to O(log log n / log^2 n) # Can get algorithm from maximum independent set # https://arxiv.org/abs/1602.07210 # The most efficient algorithm here would be for solving # "Maximum common labeled subtrees" # APX-hard for unordered trees, but polytime solveable for ordered trees # For directory structures we can induce an order, and hense obtain a # polytime solution # # # On the Maximum Common Embedded Subtree Problem for Ordered Trees # https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf from os.path import commonprefix prefixes1 = commonprefix(list(set1)).split('.') prefixes2 = commonprefix(list(target_set2)).split('.') # Remove the trailing prefixes that are the same num_same = 0 for i in range(1, min(len(prefixes1), len(prefixes2))): if prefixes1[-i] == prefixes2[-i]: num_same = i else: break prefixes1 = prefixes1[:-num_same] prefixes2 = prefixes2[:-num_same] ALLOW_FUZZY = 1 if ALLOW_FUZZY and len(prefixes2) == 0: # SUPER HACK FOR CASE WHERE THERE IS JUST ONE SPOILER ELEMENT IN THE # TARGET SET. THE ALGORITHM NEEDS TO BE RETHOUGHT FOR THAT CASE possible_prefixes = [k.split('.') for k in target_set2] prefix_hist = ub.ddict(lambda: 0) for item in possible_prefixes: for i in range(1, len(item)): prefix_hist[tuple(item[0:i])] += 1 prefixes2 = ['.'.join(ub.argmax(prefix_hist))] def add_prefix(items, prefix): return {prefix + k for k in items} def remove_prefix(items, prefix): return {k[len(prefix):] if k.startswith(prefix) else k for k in items} import itertools as it found_cand = [] for i1, i2 in it.product(range(len(prefixes1) + 1), range(len(prefixes2) + 1)): if i1 == 0 and i2 == 0: continue # Very inefficient, we should be able to do better prefix1 = '.'.join(prefixes1[:i1]) prefix2 = '.'.join(prefixes2[:i2]) if prefix1: prefix1 = prefix1 + '.' if prefix2: prefix2 = prefix2 + '.' # We are allowed to remove a prefix from a set, add the other # prefix to the set, or remove and then add. set1_cand1 = remove_prefix(set1, prefix1) set1_cand2 = add_prefix(set1, prefix2) set1_cand3 = add_prefix(set1_cand1, prefix2) common1 = set1_cand1 & target_set2 common2 = set1_cand2 & target_set2 common3 = set1_cand3 & target_set2 if common1: found_cand.append({ 'transform': [('remove', prefix1)], 'value': len(common1), }) if common2: found_cand.append({ 'transform': [('add', prefix2)], 'value': len(common2), }) if common3: found_cand.append({ 'transform': [('remove', prefix1), ('add', prefix2)], 'value': len(common3), }) if len(found_cand): found = max(found_cand, key=lambda x: x['value']) else: found = None return found
def _fix_keys(model_state_dict): """ Hack around DataParallel wrapper. If there is nothing in common between the two models check to see if prepending 'module.' to other keys fixes it. """ other_keys = set(model_state_dict) self_keys = set(self_state) if 0: # Automatic way to reduce nodes in the trees? # If node b always follows node a, can we contract it? nodes1 = [n for p in other_keys for n in p.split('.')] nodes2 = [n for p in self_keys for n in p.split('.')] tups1 = list(tup for key in other_keys for tup in ub.iter_window(key.split('.'), 2)) tups2 = list(tup for key in self_keys for tup in ub.iter_window(key.split('.'), 2)) x = ub.ddict(list) for a, b in tups1: x[a].append(b) for a, b in tups2: x[a].append(b) nodehist = ub.dict_hist(nodes1 + nodes2) for k, v in x.items(): print('----') print(k) print(nodehist[k]) follow_hist = ub.dict_hist(v) print(follow_hist) total = sum(follow_hist.values()) if ub.allsame(follow_hist.values()) and total == nodehist[k]: print('CONTRACT') # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2])) # print(forest_str(paths_to_otree(other_keys, '.'))) # common_keys = other_keys.intersection(self_keys) # if not common_keys: if not other_keys.issubset(self_keys): if association == 'strict': pass elif association == 'module-hack': # If there are no common keys try a hack prefix = 'module.' def smap(f, ss): return set(map(f, ss)) def fix1(k): return prefix + k def fix2(k): if k.startswith(prefix): return k[len(prefix):] if smap(fix1, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix1, model_state_dict) elif smap(fix2, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix2, model_state_dict) elif association == 'prefix-hack': import functools def add_prefix(k, prefix): return prefix + k def remove_prefix(k, prefix): if k.startswith(prefix): return k[len(prefix):] # set1 = other_keys # target_set2 = self_keys found = _best_prefix_transform(other_keys, self_keys) if found is not None: for action, prefix in found['transform']: if action == 'add': func = functools.partial(add_prefix, prefix=prefix) elif action == 'remove': func = functools.partial(remove_prefix, prefix=prefix) else: raise AssertionError model_state_dict = ub.map_keys(func, model_state_dict) elif association in {'embedding', 'isomorphism'}: if verbose > 1: print('Using subpath {} association, may take some time'. format(association)) # I believe this is the correct way to solve the problem paths1 = sorted(other_keys) paths2 = sorted(self_state) if 1: # hack to filter to reduce tree size in embedding problem def shrink_paths(paths): new_paths = [] for p in paths: p = p.replace('.0', ':0') p = p.replace('.1', ':1') p = p.replace('.2', ':2') p = p.replace('.3', ':3') p = p.replace('.4', ':4') p = p.replace('.5', ':5') p = p.replace('.6', ':6') p = p.replace('.7', ':7') p = p.replace('.8', ':8') p = p.replace('.9', ':9') p = p.replace('.weight', ':weight') p = p.replace('.bias', ':bias') p = p.replace('.num_batches_tracked', ':num_batches_tracked') p = p.replace('.running_mean', ':running_mean') p = p.replace('.running_var', ':running_var') # p = p.replace('.conv1', ':conv1') # p = p.replace('.conv2', ':conv2') # p = p.replace('.conv3', ':conv3') # p = p.replace('.bn1', ':bn1') # p = p.replace('.bn2', ':bn2') # p = p.replace('.bn3', ':bn3') new_paths.append(p) return new_paths # Reducing the depth saves a lot of time paths1_ = shrink_paths(paths1) paths2_ = shrink_paths(paths2) subpaths1, subpaths2 = maximum_common_ordered_subpaths( paths1_, paths2_, sep='.', mode=association) subpaths1 = [p.replace(':', '.') for p in subpaths1] subpaths2 = [p.replace(':', '.') for p in subpaths2] mapping = ub.dzip(subpaths1, subpaths2) if verbose > 1: other_unmapped = sorted(other_keys - set(mapping.keys())) self_unmapped = sorted(self_keys - set(mapping.values())) print('-- embed association (other -> self) --') print('mapping = {}'.format(ub.repr2(mapping, nl=1))) print('self_unmapped = {}'.format( ub.repr2(self_unmapped, nl=1))) print('other_unmapped = {}'.format( ub.repr2(other_unmapped, nl=1))) print('len(mapping) = {}'.format( ub.repr2(len(mapping), nl=1))) print('len(self_unmapped) = {}'.format( ub.repr2(len(self_unmapped), nl=1))) print('len(other_unmapped) = {}'.format( ub.repr2(len(other_unmapped), nl=1))) print('-- end embed association --') # HACK: something might be wrong, there was an instance with # HRNet_w32 where multiple keys mapped to the same key # bad keys were incre_modules.3.0.conv1.weight and conv1.weight # # This will not error, but may produce bad output try: model_state_dict = ub.map_keys(lambda k: mapping.get(k, k), model_state_dict) except Exception as ex: HACK = 1 if HACK: new_state_dict_ = {} for k, v in model_state_dict.items(): new_state_dict_[mapping.get(k, k)] = v model_state_dict = new_state_dict_ warnings.warn('ex = {!r}'.format(ex)) else: raise else: raise KeyError(association) return model_state_dict