def main(): sns = kwplot.autosns() # NOQA plt = kwplot.autoplt() # NOQA if 1: array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] values = [-1, 0, 1, 4, 6, 6.1, 6.5, 7, 10.3, 10.5, 10.7, 15, 16] if 0: array = np.linspace(0, 30) values = np.linspace(0, 30) if 0: xscale = 20 num = 20 array = np.array(sorted(np.random.rand(num) * xscale)).round() values = np.hstack([ np.unique(np.random.choice(array, 3)), np.random.rand(num // 2) * xscale ]) fig = kwplot.figure(fnum=1, doclf=1, pnum=(2, 1, 1)) ax = fig.gca() plot_searchsorted_visualization(array, values, side='left', ax=ax) ax.set_title('association = searchsorted(array, values, side=left)') fig = kwplot.figure(fnum=1, doclf=0, pnum=(2, 1, 2)) ax = fig.gca() plot_searchsorted_visualization(array, values, side='right', ax=ax) ax.set_title('association = searchsorted(array, values, side=right)') import ubelt as ub fig.suptitle( ub.codeblock(''' Notice: side=left and side=right have the same result except when the value is already in the array. '''))
def benchmark_ubelt_import_time_robust(): import pandas as pd import ubelt as ub import kwplot sns = kwplot.autosns(force='Qt5Agg') prog = ub.codeblock(r''' def _main(): import subprocess import ubelt as ub measurements = [] for i in range(200): row = {} # info = ub.cmd('python -X importtime -c "import ubelt"') # text = info['err'] prog = subprocess.Popen('python -X importtime -c "import ubelt"', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, text = prog.communicate() text = text.decode() final_line = text.rstrip().split('\n')[-1] partial = final_line.split(':')[1].split('|') row['self_us'] = float(partial[0].strip()) row['cummulative'] = float(partial[1].strip()) measurements.append(row) import pandas as pd df = pd.DataFrame(measurements) stats = pd.DataFrame({ 'mean': df.mean(), 'std': df.std(), 'min': df.min(), 'max': df.max(), 'total': df.sum(), }) info = stats.to_dict() info['version'] = ub.__version__ print(info) # print(stats) _main() ''') dpath = ub.Path(ub.ensure_app_cache_dir('ubelt/tests/test_version_import')) fpath = dpath / 'do_test.py' fpath.write_text(prog) repo_root = ub.Path('$HOME/code/ubelt').expand() info = ub.cmd('git tag', cwd=repo_root) versions = [p for p in info['out'].split('\n') if p] branches = ['dev/1.0.1', 'main'] + versions fig = kwplot.figure(doclf=True) ax = fig.gca() bname_to_info = {} rows = [] for bname in branches: print('bname = {!r}'.format(bname)) ub.cmd('git checkout {}'.format(bname), cwd=repo_root, verbose=3, check=True) info = ub.cmd('python {}'.format(fpath), verbose=2) dict_info = eval(info['out']) bname_to_info[bname] = dict_info for stat in ['mean', 'min', 'max']: for type in ['self_us', 'cummulative']: rows.append({ 'version': dict_info['version'], 'stat': stat, 'type': type, 'time': dict_info[stat][type], }) df = pd.DataFrame(rows[-1:]) print(df) # ax.cla() # sns.lineplot(data=df, x='version', y='time', hue='stat', style='type', ax=ax) ub.cmd('git checkout {}'.format('dev/1.0.1'), cwd=repo_root) df = pd.DataFrame(rows) from distutils.version import LooseVersion unique_versions = list( map(str, sorted(map(LooseVersion, df['version'].unique())))) df['release_index'] = df['version'].apply( lambda x: unique_versions.index(x)) ax.cla() kwplot.figure(fnum=2, pnum=(2, 1, 1), doclf=True) ax = sns.lineplot(data=df[df['type'] == 'cummulative'], x='release_index', y='time', hue='stat', style='type', marker='o') ax.set_title('Ubelt import time over release history') kwplot.figure(fnum=2, pnum=(2, 1, 2)) sns.lineplot(data=df[df['type'] == 'self_us'], x='release_index', y='time', hue='stat', style='type', marker='o')
def _devcheck_load_sub_image(): import kwimage import numpy as np sampler = grab_camvid_sampler() cid_to_cidx = sampler.catgraph.id_to_idx classes = sampler.catgraph # Try loading a subregion of an image sample = sampler.load_positive(2) imdata = sample['im'] annots = sample['annots'] aids = annots['aids'] cids = annots['cids'] boxes = annots['rel_boxes'] class_idxs = np.array([cid_to_cidx[cid] for cid in cids]) segmentations = annots['rel_ssegs'] raw_dets = kwimage.Detections( aids=aids, boxes=boxes, class_idxs=class_idxs, segmentations=segmentations, classes=classes, datakeys=['aids'], ) # Clip boxes to the image boundary input_dims = imdata.shape[0:2] raw_dets.data['boxes'] = raw_dets.boxes.clip(0, 0, input_dims[1], input_dims[0]) keep = [] for i, s in enumerate(raw_dets.data['segmentations']): # TODO: clip polygons m = s.to_mask(input_dims) if m.area > 0: keep.append(i) dets = raw_dets.take(keep) heatmap = dets.rasterize(bg_size=(1, 1), input_dims=input_dims) if 1: print('dets = {!r}'.format(dets)) print('dets.data = {!r}'.format(dets.data)) print('dets.meta = {!r}'.format(dets.meta)) if ub.argflag('--show'): import kwplot kwplot.autompl() heatmap.draw() draw_boxes = 1 kwplot.figure(doclf=True) with ub.Timer('dets.draw_on'): canvas = imdata.copy() # TODO: add logic to color by class canvas = dets.draw_on(canvas, boxes=draw_boxes, color='random') kwplot.imshow(canvas, pnum=(1, 2, 1), title='dets.draw_on') with ub.Timer('dets.draw'): kwplot.imshow(imdata, pnum=(1, 2, 2), docla=True, title='dets.draw') dets.draw(boxes=draw_boxes, color='random')
def benchmark_repeat_vs_reduce_mul(): import ubelt as ub import pandas as pd import timerit def reduce_daq_rec(func, arrs): if len(arrs) == 1: return arrs[0] if len(arrs) == 2: return func(arrs[0], arrs[1]) elif len(arrs) == 3: return func(func(arrs[0], arrs[1]), arrs[3]) else: arrs1 = arrs[0::2] arrs2 = arrs[1::2] res1 = reduce_daq_rec(func, arrs1) res2 = reduce_daq_rec(func, arrs2) res = func(res1, res2) return res def reduce_daq_iter(func, arrs): """ https://www.baeldung.com/cs/convert-recursion-to-iteration https://stackoverflow.com/questions/159590/way-to-go-from-recursion-to-iteration arrs = [2, 3, 5, 7, 11, 13, 17, 21] """ raise NotImplementedError # TODO: make the iterative version from collections import deque empty_result = None stack = deque([(arrs, empty_result)]) idx = 0 while stack: print('----') print('stack = {}'.format(ub.repr2(list(stack), nl=1))) arrs0, result = stack.pop() if len(arrs0) == 0: raise Exception if result is not None: # raise Exception results = [result] while stack: next_arrs0, next_result = stack.pop() if next_result is None: break else: results.append(next_result) if results: if len(results) == 1: stack.append((results, results[0])) else: stack.append((results, None)) if next_result is None: stack.append((next_arrs0, None)) elif result is None: if len(arrs0) == 1: result = arrs0[0] stack.append((arrs0, result)) # return arrs0[0] if len(arrs0) == 2: result = func(arrs0[0], arrs0[1]) stack.append((arrs0, result)) elif len(arrs0) == 3: result = func(func(arrs0[0], arrs0[1]), arrs0[3]) stack.append((arrs0, result)) else: arrs01 = arrs0[0::2] arrs02 = arrs0[1::2] stack.append((arrs0, empty_result)) stack.append((arrs01, empty_result)) stack.append((arrs02, empty_result)) # res1 = reduce_daq_rec(func, arrs01) # res2 = reduce_daq_rec(func, arrs2) # res = func(res1, res2) idx += 1 if idx > 10: raise Exception return res def method_daq_rec(arrs): return reduce_daq_rec(np.multiply, arrs) def method_repeat(arrs): """ helper code: arr_names = ['a{:02d}'.format(idx) for idx in range(1, 32 + 1)] lhs = ', '.join(arr_names) rhs = ' * '.join(arr_names) print(f'{lhs} = arrs') print(f'ret = {rhs}') """ # Hard coded pure python syntax for multiplying if len(arrs) == 4: a01, a02, a03, a04 = arrs ret = a01 * a02 * a03 * a04 elif len(arrs) == 8: a01, a02, a03, a04, a05, a06, a07, a08 = arrs ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 elif len(arrs) == 32: a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 = arrs ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 * a09 * a10 * a11 * a12 * a13 * a14 * a15 * a16 * a17 * a18 * a19 * a20 * a21 * a22 * a23 * a24 * a25 * a26 * a27 * a28 * a29 * a30 * a31 * a32 return ret def method_reduce(arrs): ret = np.multiply.reduce(arrs) return ret def method_stack(arrs): stacked = np.stack(arrs) ret = stacked.prod(axis=0) return ret method_lut = locals() # can populate this some other way ti = timerit.Timerit(10000, bestof=10, verbose=2) basis = { 'method': ['method_repeat', 'method_reduce', 'method_stack', 'method_daq_rec'], 'arr_size': [10, 100, 1000, 10000], 'num_arrs': [4, 8, 32], } xlabel = 'arr_size' kw_labels = [] group_labels = { 'style': ['num_arrs'], 'size': [], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) arr_size = params['arr_size'] num_arrs = params['num_arrs'] arrs = [] for _ in range(num_arrs): arr = np.random.rand(arr_size) arrs.append(arr) kwargs['arrs'] = arrs method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('Array Size') ax.set_ylabel('Time')
def run_benchmark_renormalization(): """ See if we can renormalize probabilities after update with a faster method that maintains memory a bit better Example: >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/misc/tests/python')) >>> from bench_renormalization import * # NOQA >>> run_benchmark_renormalization() """ import ubelt as ub import xdev import pathlib import timerit fpath = pathlib.Path('~/misc/tests/python/renormalize_cython.pyx').expanduser() renormalize_cython = xdev.import_module_from_pyx(fpath, annotate=True, verbose=3, recompile=True) xdev.profile_now(renormalize_demo_v1)(1000, 100) xdev.profile_now(renormalize_demo_v2)(1000, 100) xdev.profile_now(renormalize_demo_v3)(1000, 100) xdev.profile_now(renormalize_demo_v4)(1000, 100) func_list = [ # renormalize_demo_v1, renormalize_demo_v2, # renormalize_demo_v3, # renormalize_demo_v4, renormalize_cython.renormalize_demo_cython_v1, renormalize_cython.renormalize_demo_cython_v2, renormalize_cython.renormalize_demo_cython_v3, ] methods = {f.__name__: f for f in func_list} for key, method in methods.items(): with timerit.Timer(label=key, verbose=0) as t: method(1000, 100) print(f'{key:<30} {t.toc():0.6f}') arg_basis = { 'T': [10, 20, 30, 50], 'D': [10, 50, 100, 300], } args_grid = [] for argkw in list(ub.named_product(arg_basis)): if argkw['T'] <= argkw['D']: arg_basis['size'] = argkw['T'] * argkw['D'] args_grid.append(argkw) ti = timerit.Timerit(100, bestof=10, verbose=2) measures = [] for method_name, method in methods.items(): for argkw in args_grid: row = ub.dict_union({'method': method_name}, argkw) key = ub.repr2(row, compact=1) argkey = ub.repr2(argkw, compact=1) kwargs = ub.dict_subset(argkw, ['T', 'D']) for timer in ti.reset('time'): with timer: method(**kwargs) row['mean'] = ti.mean() row['min'] = ti.min() row['key'] = key row['argkey'] = argkey measures.append(row) import pandas as pd df = pd.DataFrame(measures) import kwplot sns = kwplot.autosns() kwplot.figure(fnum=1, pnum=(1, 2, 1), docla=True) sns.lineplot(data=df, x='D', y='min', hue='method', style='method') kwplot.figure(fnum=1, pnum=(1, 2, 2), docla=True) sns.lineplot(data=df, x='T', y='min', hue='method', style='method') p = (df.pivot(['method'], ['argkey'], ['mean'])) print(p.mean(axis=1).sort_values())
def benchmark_dict_diff_impl(): import ubelt as ub import pandas as pd import timerit import random def method_diffkeys(*args): first_dict = args[0] keys = set(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_diffkeys_list(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) keep_keys = [k for k in first_dict.keys() if k not in remove_keys] new = dict((k, first_dict[k]) for k in keep_keys) return new def method_diffkeys_oset(*args): first_dict = args[0] keys = ub.oset(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_ifkeys_setcomp(*args): first_dict = args[0] remove_keys = {k for ks in args[1:] for k in ks} new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new1 def method_ifkeys_setunion(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new2 def method_ifkeys_getitem(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) return new3 def method_ifkeys_dictcomp(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: v for k, v in first_dict.items() if k not in remove_keys} return new4 def method_ifkeys_dictcomp_getitem(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys} return new4 method_lut = locals() # can populate this some other way def make_data(num_items, num_other, remove_fraction, keytype): if keytype == 'str': keytype = str if keytype == 'int': keytype = int first_keys = [random.randint(0, 1000) for _ in range(num_items)] k = int(remove_fraction * len(first_keys)) remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)] first_dict = {keytype(k): k for k in first_keys} args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets] return args ti = timerit.Timerit(200, bestof=1, verbose=2) basis = { 'method': [ # Cant use because unordered # 'method_diffkeys', # Cant use because python 3.6 'method_ifkeys_dictcomp', 'method_ifkeys_dictcomp_getitem', 'method_ifkeys_setunion', 'method_ifkeys_getitem', 'method_diffkeys_list', # Probably not good # 'method_ifkeys_setcomp', # 'method_diffkeys_oset', ], 'num_items': [10, 100, 1000], 'num_other': [1, 3, 5], # 'num_other': [1], 'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0], # 'remove_fraction': [0.2, 0.8], 'keytype': ['str', 'int'], # 'keytype': ['str'], # 'param_name': [param values], } xlabel = 'num_items' kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype'] group_labels = { 'style': ['num_other', 'keytype'], 'size': ['remove_fraction'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) args = make_data(**kwargs) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(*args) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) # for each parameter setting, group all methods with that used those exact # comparable params. Then rank how good each method did. That will be a # preference profile. We will give that preference profile a weight (e.g. # based on the fastest method in the bunch) and then aggregate them with # some voting method. USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} weighted_rankings = ub.ddict(lambda: ub.ddict(float)) for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) # Choose a ranking weight scheme weight = variants['mean'].min() # weight = 1 for rank, method in enumerate(ranking): weighted_rankings[method][rank] += weight weighted_rankings[method]['total'] += weight # Probably a more robust voting method to do this weight_rank_rows = [] for method_name, ranks in weighted_rankings.items(): weights = ub.dict_diff(ranks, ['total']) p_rank = ub.map_vals(lambda w: w / ranks['total'], weights) for rank, w in p_rank.items(): weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name}) weight_rank_df = pd.DataFrame(weight_rank_rows) piv = weight_rank_df.pivot(['name'], ['rank'], ['weight']) print(piv) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('skill_agg =\n{}'.format(skill_agg)) aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values() print('weight aggregated =\n{}'.format(aggregated)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('A better x-variable description') ax.set_ylabel('A better y-variable description')
def benchmark_reversed_range(): import ubelt as ub import pandas as pd import timerit import itertools as it methods = [] def custom_reversed_range_v1(start, stop): final = stop - 1 for idx in range(stop - start): yield final - idx def custom_reversed_range_v2(start, stop): yield from it.islice(it.count(stop - 1, step=-1), stop - start) @methods.append def reversed_builtin(x): start = 10 stop = x + start ret = list(reversed(range(start, stop))) return ret @methods.append def negative_range(x): start = 10 stop = x + start ret = list(range(stop - 1, start - 1, -1)) return ret # @methods.append # def custom_v1(x): # start = 10 # stop = x + start # ret = list(custom_reversed_range_v1(start, stop)) # return ret # @methods.append # def custom_v2(x): # start = 10 # stop = x + start # ret = list(custom_reversed_range_v2(start, stop)) # return ret method_lut = {f.__name__: f for f in methods} results = {k: func(10) for k, func in method_lut.items()} print('results = {}'.format(ub.repr2(results, nl=1, align=':'))) if not ub.allsame(results.values()): raise AssertionError('Failed consistency check') ti = timerit.Timerit(1000, bestof=10, verbose=2) basis = { 'method': list(method_lut.keys()), 'x': [2 ** i for i in range(14)], } grid_iter = ub.named_product(basis) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: key = ub.repr2(params, compact=1, si=1) kwargs = params.copy() method_key = kwargs.pop('method') method = method_lut[method_key] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x='x', y='min', hue='method', marker='o', ax=ax) # ax.set_xscale('log') ax.set_title('Benchmark Reveral Methods ') ax.set_xlabel('A better x-variable description') ax.set_ylabel('A better y-variable description')
def benchmark_template(): import ubelt as ub import pandas as pd import timerit def method1(x, y, z): ret = [] for i in range((x + y) * z): ret.append(i) return ret def method2(x, y, z): ret = [i for i in range((x + y) * z)] return ret method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': ['method1', 'method2'], 'x': list(range(7)), 'y': [0, 100], 'z': [2, 3] # 'param_name': [param values], } xlabel = 'x' # Set these to param labels that directly transfer to method kwargs kw_labels = ['x', 'y', 'z'] # Set these to empty lists if they are not used group_labels = { 'style': ['y'], 'size': ['z'], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times chunk_iter = ub.chunks(ti.times, ti.bestof) times = list(map(min, chunk_iter)) # TODO: timerit method for this for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time' ]].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted( set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title('Benchmark Name') ax.set_xlabel('Size (todo: A better x-variable description)') ax.set_ylabel('Time (todo: A better y-variable description)') # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def ford_circles(): """ Draw Ford Circles This is a Ford Circle diagram of the Rationals and Float32 numbers. Only 163 of the 32608 rationals I generated can be exactly represented by a float32. [MF 14] [MF 95] [MF 14] https://www.youtube.com/watch?v=83ZjYvkdzYI&list=PL5A714C94D40392AB&index=14 [MF 95] https://www.youtube.com/watch?v=gATEJ3f3FBM&list=PL5A714C94D40392AB&index=95 Examples: import kwplot kwplot.autompl() """ import kwplot import ubelt as ub import matplotlib as mpl plt = kwplot.autoplt() sns = kwplot.autosns() # NOQA limit = 256 * 256 print('limit = {!r}'.format(limit)) rats_to_plot = set() maxx = 1 _iter = Rational.members(limit=limit) _genrat = set(ub.ProgIter(_iter, total=limit, desc='gen rats')) rats_to_plot |= _genrat rats_to_plot2 = sorted({Rational(r % maxx) for r in rats_to_plot} | {maxx}) floats = sorted( ub.unique(map(float, rats_to_plot2), key=lambda f: f.as_integer_ratio())) print(f'{len(rats_to_plot) = }') print(f'{len(rats_to_plot2) = }') print(f'{len(floats) = }') import numpy as np ax = kwplot.figure(fnum=1, doclf=True).gca() prog = ub.ProgIter(sorted(rats_to_plot2), verbose=1) dtype = np.float32 patches = ub.ddict(list) errors = [] for rat in prog: denominator = rat.denominator radius = 1 / (2 * (denominator * denominator)) point = (rat, radius) flt = dtype(rat) a, b = flt.as_integer_ratio() flt_as_rat = Rational(a, b) error = abs(rat - flt_as_rat) if error == 0: new_circle = plt.Circle(point, radius, facecolor='dodgerblue', edgecolor='none', linewidth=0, alpha=0.5) patches['good'].append(new_circle) else: errors.append(error) # Plot a line for error new_circle = plt.Circle(point, radius, facecolor='orangered', edgecolor='none', linewidth=0, alpha=0.5) patches['bad'].append(new_circle) ax.plot((rat - error, rat + error), (radius, radius), 'x-', color='darkgray') print(ub.map_vals(len, patches)) total = float(sum(errors)) print('total = {!r}'.format(total)) print(max(errors)) print(min(errors)) for v in patches.values(): first = ub.peek(v) prop = ub.dict_isect(first.properties(), ['facecolor', 'linewidth', 'alpha', 'edgecolor']) col = mpl.collections.PatchCollection(v, **prop) ax.add_collection(col) # Lets look for the holes in IEEE float # for flt in ub.ProgIter(sorted(floats), verbose=1): kwplot.phantom_legend({ f'rationals without a {dtype}': 'orangered', f'rationals with a {dtype}': 'dodgerblue', f'x-x indicates {dtype} approximation error': 'darkgray', }) ax.set_title('Holes in IEEE 754 Float64') ax.set_xlabel('A rational number') ax.set_ylabel('The squared rational denominator') # import numpy as np # points = np.array([c.center for c in _circles]) # maxx, maxy = points.max(axis=0) # print('maxx = {!r}'.format(maxx)) # print('maxy = {!r}'.format(maxy)) # maxx, maxy = maxx // 2, maxy // 2 # ax.set_xlim(0, np.sqrt(int(maxx))) # ax.set_ylim(0, np.sqrt(int(maxy))) # ax.set_aspect('equal') # ax.set_xlim(0.2, 0.22) ax.set_xlim(0, 1) ax.set_ylim(0, 0.1)
def benchmark_video_readers(): """ "On My Machine" I get: ti.measures = { 'mean' : { 'cv2 sequential access' : 0.0137, 'decord sequential access' : 0.0175, 'cv2 open + first access' : 0.0222, 'decord open + first access' : 0.0565, 'vi3o sequential access' : 0.0642, 'cv2 open + one random access' : 0.0723, 'decord open + one random access': 0.0946, 'vi3o open + first access' : 0.1045, 'cv2 random access' : 0.3316, 'decord random access' : 0.3472, 'decord random batch access' : 0.3482, 'vi3o open + one random access' : 0.3590, 'vi3o random access' : 1.6660, }, 'mean+std': { 'cv2 sequential access' : 0.0145, 'decord sequential access' : 0.0182, 'cv2 open + first access' : 0.0230, 'vi3o sequential access' : 0.0881, 'decord open + first access' : 0.1038, 'vi3o open + first access' : 0.1059, 'cv2 open + one random access' : 0.1151, 'decord open + one random access': 0.1329, 'cv2 random access' : 0.3334, 'decord random access' : 0.3496, 'decord random batch access' : 0.3511, 'vi3o open + one random access' : 0.5215, 'vi3o random access' : 1.6890, }, 'mean-std': { 'decord open + first access' : 0.0091, 'cv2 sequential access' : 0.0130, 'decord sequential access' : 0.0168, 'cv2 open + first access' : 0.0214, 'cv2 open + one random access' : 0.0295, 'vi3o sequential access' : 0.0403, 'decord open + one random access': 0.0563, 'vi3o open + first access' : 0.1032, 'vi3o open + one random access' : 0.1965, 'cv2 random access' : 0.3299, 'decord random access' : 0.3448, 'decord random batch access' : 0.3452, 'vi3o random access' : 1.6429, }, 'min' : { 'cv2 sequential access' : 0.0128, 'decord sequential access' : 0.0166, 'cv2 open + first access' : 0.0210, 'vi3o sequential access' : 0.0233, 'decord open + first access' : 0.0251, 'cv2 open + one random access' : 0.0282, 'decord open + one random access': 0.0527, 'vi3o open + one random access' : 0.1013, 'vi3o open + first access' : 0.1026, 'cv2 random access' : 0.3299, 'decord random access' : 0.3433, 'decord random batch access' : 0.3452, 'vi3o random access' : 1.6423, }, } """ # video_fpath = ub.grabdata('https://download.blender.org/peach/bigbuckbunny_movies/big_buck_bunny_720p_h264.mov') try: import vi3o except Exception: vi3o = None # video_fpath = ub.grabdata('https://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4') video_fpath = ub.grabdata('https://file-examples-com.github.io/uploads/2018/04/file_example_MOV_1280_1_4MB.mov') ti = timerit.Timerit(10, bestof=3, verbose=3, unit='ms') video_length = len(CV2VideoReader(video_fpath)) num_frames = min(5, video_length) rng = kwarray.ensure_rng(0) random_indices = rng.randint(0, video_length, size=num_frames).tolist() if True: with timerit.Timer(label='open cv2') as cv2_open_timer: cv2_video = CV2VideoReader(video_fpath) for timer in ti.reset('cv2 sequential access'): cv2_video.seek(0) with timer: for frame, _ in zip(cv2_video, range(num_frames)): pass for timer in ti.reset('cv2 random access'): with timer: for index in random_indices: cv2_video[index] if vi3o is not None: with timerit.Timer(label='open vi3o') as vi3o_open_timer: vi3o_video = vi3o.Video(video_fpath) for timer in ti.reset('vi3o sequential access'): with timer: for frame, _ in zip(vi3o_video, range(num_frames)): pass for timer in ti.reset('vi3o random access'): with timer: for index in random_indices: vi3o_video[index] if True: import decord with timerit.Timer(label='open decord') as decord_open_timer: decord_video = decord.VideoReader(video_fpath) for timer in ti.reset('decord sequential access'): with timer: for frame, _ in zip(decord_video, range(num_frames)): pass for timer in ti.reset('decord random access'): with timer: for index in random_indices: decord_video[index] for timer in ti.reset('decord random batch access'): with timer: decord_video.get_batch(random_indices) if True: # One Random Access Case def _work_to_clear_io_caches(): import kwimage # Let some caches be cleared for i in range(10): for key in kwimage.grab_test_image.keys(): kwimage.grab_test_image(key) rng = kwarray.ensure_rng(0) for timer in ti.reset('cv2 open + one random access'): _work_to_clear_io_caches() with timer: _cv2_video = CV2VideoReader(video_fpath) index = rng.randint(0, video_length, size=1)[0] _cv2_video[index] if vi3o is not None: rng = kwarray.ensure_rng(0) for timer in ti.reset('vi3o open + one random access'): _work_to_clear_io_caches() with timer: _vi3o_video = vi3o.Video(video_fpath) index = rng.randint(0, video_length, size=1)[0] _vi3o_video[index] rng = kwarray.ensure_rng(0) for timer in ti.reset('decord open + one random access'): _work_to_clear_io_caches() with timer: _decord_video = decord.VideoReader(video_fpath) index = rng.randint(0, video_length, size=1)[0] _decord_video[index] for timer in ti.reset('cv2 open + first access'): _work_to_clear_io_caches() with timer: _cv2_video = CV2VideoReader(video_fpath) _cv2_video[0] if vi3o is not None: for timer in ti.reset('vi3o open + first access'): _work_to_clear_io_caches() with timer: _vi3o_video = vi3o.Video(video_fpath) _vi3o_video[0] for timer in ti.reset('decord open + first access'): _work_to_clear_io_caches() with timer: _decord_video = decord.VideoReader(video_fpath) _decord_video[0] measures = ub.map_vals(ub.sorted_vals, ti.measures) print('ti.measures = {}'.format(ub.repr2(measures, nl=2, align=':', precision=4))) print('cv2_open_timer.elapsed = {!r}'.format(cv2_open_timer.elapsed)) print('decord_open_timer.elapsed = {!r}'.format(decord_open_timer.elapsed)) if vi3o: print('vi3o_open_timer.elapsed = {!r}'.format(vi3o_open_timer.elapsed)) import kwplot import seaborn as sns sns.set() plt = kwplot.autoplt() df = pd.DataFrame(ti.measures) df['key'] = df.index df['expt'] = df['key'].apply(lambda k: ' '.join(k.split(' ')[1:])) df['module'] = df['key'].apply(lambda k: k.split(' ')[0]) # relmod = 'decord' relmod = 'cv2' for k, group in df.groupby('expt'): measure = 'mean' relval = group[group['module'] == relmod][measure].values.ravel() if len(relval) > 0: assert len(relval) == 1 df.loc[group.index, measure + '_rel'] = group[measure] / relval df.loc[group.index, measure + '_slower_than_' + relmod] = group[measure] / relval df.loc[group.index, measure + '_faster_than_' + relmod] = relval / group[measure] fig = kwplot.figure(fnum=1, doclf=True) ax = fig.gca() y_key = "mean_faster_than_" + relmod sub_df = df.loc[~df[y_key].isnull()] sns.barplot( x="expt", y=y_key, data=sub_df, hue='module', ax=ax) ax.set_title('cpu video reading benchmarks') plt.show()
def plot_convolutional_features(conv, limit=144, colorspace='rgb', fnum=None, nCols=None, voxels=False, alpha=.2, labels=False, normaxis=None, _hack_2drows=False): """Plots the convolutional layers to a matplotlib pyplot. The convolutional filters (kernels) are stored into a grid and saved to disk as a Maplotlib figure. The convolutional filters, if it has one channel, will be stored as an intensity imgage. If a colorspace is specified and there are three input channels, the convolutional filters will be represented as an RGB image. In the event that 2 or 4+ filters are displayed, the different channels will be flattened and showed as distinct outputs in the grid. TODO: - [ ] refactor to use make_conv_images Args: conv (torch.nn.ConvNd): torch convolutional layer with weights to draw limit (int, optional): the limit on the number of filters drawn in the figure, achieved by simply dropping any filters past the limit starting at the first filter. Detaults to 144. colorspace (str): the colorspace seen by the convolutional filter (if applicable), so we can convert to rgb for display. voxels (bool): if True, and we have a 3d conv, show the voxels alpha (float): only applicable if voxels=True stride (list): only applicable if voxels=True Returns: matplotlib.figure.Figure: fig - a Matplotlib figure References: https://matplotlib.org/devdocs/gallery/mplot3d/voxels.html Example: >>> # xdoctest: +REQUIRES(module:torch) >>> conv = torch.nn.Conv2d(3, 9, (5, 7)) >>> plot_convolutional_features(conv, colorspace=None, fnum=None, limit=2) Example: >>> # xdoctest: +REQUIRES(--comprehensive) >>> # xdoctest: +REQUIRES(module:torch) >>> import torchvision >>> # 2d uncolored gray-images >>> conv = torch.nn.Conv3d(1, 2, (3, 4, 5)) >>> plot_convolutional_features(conv, colorspace=None, fnum=1, limit=2) >>> # 2d colored rgb-images >>> conv = torch.nn.Conv3d(3, 2, (6, 4, 5)) >>> plot_convolutional_features(conv, colorspace='rgb', fnum=1, limit=2) >>> # 2d uncolored rgb-images >>> conv = torch.nn.Conv3d(3, 2, (6, 4, 5)) >>> plot_convolutional_features(conv, colorspace=None, fnum=1, limit=2) >>> # 3d gray voxels >>> conv = torch.nn.Conv3d(1, 2, (6, 4, 5)) >>> plot_convolutional_features(conv, colorspace=None, fnum=1, voxels=True, >>> limit=2) >>> # 3d color voxels >>> conv = torch.nn.Conv3d(3, 2, (6, 4, 5)) >>> plot_convolutional_features(conv, colorspace='rgb', fnum=1, >>> voxels=True, alpha=1, limit=3) >>> # hack the nice resnet weights into 3d-space >>> # xdoctest: +REQUIRES(--network) >>> import torchvision >>> model = torchvision.models.resnet50(pretrained=True) >>> conv = torch.nn.Conv3d(3, 1, (7, 7, 7)) >>> weights_tohack = model.conv1.weight[0:7].data.numpy() >>> # normalize each weight for nice colors, then place in the conv3d >>> for w in weights_tohack: ... w[:] = (w - w.min()) / (w.max() - w.min()) >>> weights_hacked = weights_tohack.transpose(1, 0, 2, 3)[None, :] >>> conv.weight.data[:] = torch.FloatTensor(weights_hacked) >>> plot_convolutional_features(conv, colorspace='rgb', fnum=1, voxels=True, alpha=.6) >>> plot_convolutional_features(conv, colorspace='rgb', fnum=2, voxels=False, alpha=.9) Example: >>> # xdoctest: +REQUIRES(--network) >>> # xdoctest: +REQUIRES(module:torch) >>> import torchvision >>> model = torchvision.models.resnet50(pretrained=True) >>> conv = model.conv1 >>> plot_convolutional_features(conv, colorspace='rgb', fnum=None) """ import kwplot kwplot.autompl() import matplotlib.pyplot as plt # get relavent data out of pytorch module weights = conv.weight.data.cpu().numpy() in_channels = conv.in_channels # out_channels = conv.out_channels kernel_size = conv.kernel_size conv_dim = len(kernel_size) # TODO: use make_conv_images in the 2d case here if voxels: # use up to 3 spatial dimensions spatial_axes = list(kernel_size[-3:]) else: # use only 2 spatial dimensions spatial_axes = list(kernel_size[-2:]) color_axes = [] output_axis = 0 # If there are 3 input channels, we can visualize features in a colorspace if colorspace is not None and in_channels == 3: # Move colorable channels to the end (handle 1, 2 and 3d convolution) axes = [0] + list(range(2, 2 + conv_dim)) + [1] weights = weights.transpose(*axes) color_axes = [in_channels] output_axis = 0 else: pass # Normalize layer weights between 0 and 1 if normaxis is None: minval = weights.min() maxval = weights.max() else: # if normaxis=0 norm over output channels minval = weights.min(axis=output_axis, keepdims=True) maxval = weights.max(axis=output_axis, keepdims=True) weights_norm = (weights - minval) / (maxval - minval) if _hack_2drows: # To agree with jason's visualization for a paper figure if not voxels: weights_norm = weights_norm.transpose(1, 0, 2, 3) # flatten everything but the spatial and requested color dims weights_flat = weights_norm.reshape(-1, *(spatial_axes + color_axes)) num_plots = min(weights_flat.shape[0], limit) dim = int(np.ceil(np.sqrt(num_plots))) if voxels: from mpl_toolkits.mplot3d import Axes3D # NOQA filled = np.ones(spatial_axes, dtype=np.bool) # np.ones(spatial_axes) # d, h, w = np.indices(spatial_axes) fnum = kwplot.ensure_fnum(fnum) fig = kwplot.figure(fnum=fnum) fig.clf() if nCols is None: nCols = dim pnum_ = kwplot.PlotNums(nCols=nCols, nSubplots=num_plots) def plot_kernel3d(i): img = weights_flat[i] # fig = kwplot.figure(fnum=fnum, pnum=pnum_[i]) ax = fig.add_subplot(*pnum_[i], projection='3d') # ax = fig.gca(projection='3d') alpha_ = (filled * alpha)[..., None] colors = img if not color_axes: import kwimage # transform grays into colors grays = kwimage.atleast_nd(img, 4) colors = np.concatenate([grays, grays, grays], axis=3) if colorspace and color_axes: import kwimage # convert into RGB for d in range(len(colors)): colors[d] = kwimage.convert_colorspace(colors[d], src_space=colorspace, dst_space='rgb') facecolors = np.concatenate([colors, alpha_], axis=3) # shuffle dims so height is upwards and depth move away from us. dim_labels = ['d', 'h', 'w'] axes = [2, 0, 1] dim_labels = list(ub.take(dim_labels, axes)) facecolors = facecolors.transpose(*(axes + [3])) filled_ = filled.transpose(*axes) spatial_axes_ = list(ub.take(spatial_axes, axes)) # ax.voxels(filled_, facecolors=facecolors, edgecolors=facecolors) if False: ax.voxels(filled_, facecolors=facecolors, edgecolors='k') else: # hack to show "occluded" voxels # stride = [1, 3, 1] stride = [2, 2, 2] slices = tuple(slice(None, None, s) for s in stride) spatial_axes2 = list(np.array(spatial_axes_) * stride) filled2 = np.zeros(spatial_axes2, dtype=np.bool) facecolors2 = np.empty(spatial_axes2 + [4], dtype=np.float32) filled2[slices] = filled_ facecolors2[slices] = facecolors edgecolors2 = [0, 0, 0, alpha] # 'k' # edgecolors2 = facecolors2 # Shrink the gaps, which let you see occluded voxels x, y, z = np.indices(np.array(filled2.shape) + 1).astype(float) // 2 x[0::2, :, :] += 0.05 y[:, 0::2, :] += 0.05 z[:, :, 0::2] += 0.05 x[1::2, :, :] += 0.95 y[:, 1::2, :] += 0.95 z[:, :, 1::2] += 0.95 ax.voxels(x, y, z, filled2, facecolors=facecolors2, edgecolors=edgecolors2) for xyz, dlbl in zip(['x', 'y', 'z'], dim_labels): getattr(ax, 'set_' + xyz + 'label')(dlbl) for xyz in ['x', 'y', 'z']: getattr(ax, 'set_' + xyz + 'ticks')([]) ax.set_aspect('equal') if not labels or i < num_plots - 1: # show axis only on the last plot ax.grid(False) plt.axis('off') for i in ub.ProgIter(range(num_plots), desc='plot conv layer', enabled=False): if voxels: plot_kernel3d(i) else: img = weights_flat[i] kwplot.imshow(img, fnum=fnum, pnum=pnum_[i], interpolation='nearest', colorspace=colorspace) return fig
def benchmark_unpack(): """ What is faster unpacking items with slice syntax or tuple-unpacking Slice unpacking seems to be a tad faster. """ import ubelt as ub import random import pandas as pd import timerit import string def tuple_unpack(items): *prefix, key = items return prefix, key def slice_unpack(items): prefix, key = items[:-1], items[-1] return prefix, key method_lut = locals() # can populate this some other way ti = timerit.Timerit(5000, bestof=3, verbose=2) basis = { 'method': ['tuple_unpack', 'slice_unpack'], 'size': list(range(1, 64 + 1)), 'type': ['string', 'float'], } xlabel = 'size' kw_labels = [] group_labels = { 'style': ['type'], 'size': [], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) size = params['size'] method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): if type == 'string': items = [ ''.join(random.choices(string.printable, k=5)) for _ in range(size) ] elif type == 'float': items = [random.random() for _ in range(size)] with timer: method(items) for time in ti.times: row = { 'time': time, 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('time') summary_rows = [] for method, group in data.groupby('method'): row = {} row['method'] = method row['mean'] = group['time'].mean() row['std'] = group['time'].std() row['min'] = group['time'].min() row['max'] = group['time'].max() summary_rows.append(row) print(pd.DataFrame(summary_rows).sort_values('mean')) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('Execution time') ax.set_ylabel('Size of slices')
def benchmark_pathlib_vs_fspath(): import ubelt as ub import pathlib import pandas as pd import random import timerit import os def method_pathlib(inputs): p = pathlib.Path(*inputs) def method_ospath(inputs): p = os.path.join(*inputs) method_lut = locals() # can populate this some other way ti = timerit.Timerit(10000, bestof=10, verbose=2) basis = { 'method': ['method_pathlib', 'method_ospath'], 'num_parts': [2, 4, 8, 12, 16], } xlabel = 'num_parts' kw_labels = [] group_labels = { 'style': [], 'size': [], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) n = params['num_parts'] inputs = [chr(random.randint(97, 120)) for _ in range(n)] kwargs['inputs'] = inputs method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('Time') ax.set_ylabel('Number of parts')
def pandas_plot_matrix(df, rot=90, ax=None, grid=True, label=None, zerodiag=False, cmap='viridis', showvals=False, logscale=True): import matplotlib as mpl import copy from matplotlib import pyplot as plt import matplotlib.cm # NOQA import kwplot if ax is None: fig = kwplot.figure(fnum=1, pnum=(1, 1, 1)) fig.clear() ax = plt.gca() ax = plt.gca() values = df.values if zerodiag: values = values.copy() values = values - np.diag(np.diag(values)) # aximg = ax.imshow(values, interpolation='none', cmap='viridis') if logscale: from matplotlib.colors import LogNorm vmin = df[df > 0].min().min() norm = LogNorm(vmin=vmin, vmax=values.max()) else: norm = None cmap = copy.copy(mpl.cm.get_cmap(cmap)) # copy the default cmap cmap.set_bad((0, 0, 0)) aximg = ax.matshow(values, interpolation='none', cmap=cmap, norm=norm) # aximg = ax.imshow(values, interpolation='none', cmap='viridis', norm=norm) # ax.imshow(values, interpolation='none', cmap='viridis') ax.grid(False) cax = plt.colorbar(aximg, ax=ax) if label is not None: cax.set_label(label) ax.set_xticks(list(range(len(df.index)))) ax.set_xticklabels([lbl[0:100] for lbl in df.index]) for lbl in ax.get_xticklabels(): lbl.set_rotation(rot) for lbl in ax.get_xticklabels(): lbl.set_horizontalalignment('center') ax.set_yticks(list(range(len(df.columns)))) ax.set_yticklabels([lbl[0:100] for lbl in df.columns]) for lbl in ax.get_yticklabels(): lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels(): lbl.set_verticalalignment('center') # Grid lines around the pixels if grid: offset = -.5 xlim = [-.5, len(df.columns)] ylim = [-.5, len(df.index)] segments = [] for x in range(ylim[1]): xdata = [x + offset, x + offset] ydata = ylim segment = list(zip(xdata, ydata)) segments.append(segment) for y in range(xlim[1]): xdata = xlim ydata = [y + offset, y + offset] segment = list(zip(xdata, ydata)) segments.append(segment) bingrid = mpl.collections.LineCollection(segments, color='w', linewidths=1) ax.add_collection(bingrid) if showvals: x_basis = np.arange(len(df.columns)) y_basis = np.arange(len(df.index)) x, y = np.meshgrid(x_basis, y_basis) for c, r in zip(x.flatten(), y.flatten()): val = df.iloc[r, c] ax.text(c, r, val, va='center', ha='center', color='white') return ax
def draw_perclass_prcurve(cx_to_info, classes=None, prefix='', fnum=1, **kw): """ Args: cx_to_info (PerClass_Measures | Dict): Example: >>> # xdoctest: +REQUIRES(module:kwplot) >>> from kwcoco.metrics.drawing import * # NOQA >>> from kwcoco.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo( >>> nimgs=3, nboxes=(0, 10), n_fp=(0, 3), n_fn=(0, 2), classes=3, score_noise=0.1, box_noise=0.1, with_probs=False) >>> cfsn_vecs = dmet.confusion_vectors() >>> print(cfsn_vecs.data.pandas()) >>> classes = cfsn_vecs.classes >>> cx_to_info = cfsn_vecs.binarize_ovr().measures()['perclass'] >>> print('cx_to_info = {}'.format(ub.repr2(cx_to_info, nl=1))) >>> import kwplot >>> kwplot.autompl() >>> draw_perclass_prcurve(cx_to_info, classes) >>> # xdoctest: +REQUIRES(--show) >>> kwplot.show_if_requested() Ignore: from kwcoco.metrics.drawing import * # NOQA import xdev globals().update(xdev.get_func_kwargs(draw_perclass_prcurve)) """ import kwplot # Sort by descending AP cxs = list(cx_to_info.keys()) priority = np.array([item['ap'] for item in cx_to_info.values()]) priority[np.isnan(priority)] = -np.inf cxs = list(ub.take(cxs, np.argsort(priority)))[::-1] aps = [] xydata = ub.odict() for cx in cxs: info = cx_to_info[cx] catname = classes[cx] if isinstance(cx, int) else cx ap = info['ap'] if 'pr' in info: pr = info['pr'] elif 'ppv' in info: pr = (info['ppv'], info['tpr']) elif 'prec' in info: pr = (info['prec'], info['rec']) else: raise KeyError('pr, prec, or ppv not in info') if np.isfinite(ap): aps.append(ap) (precision, recall) = pr else: aps.append(np.nan) precision, recall = [0], [0] if precision is None and recall is None: # I thought AP=nan in this case, but I missed something precision, recall = [0], [0] label_suffix = _realpos_label_suffix(info) label = 'ap={:0.2f}: {} ({})'.format(ap, catname, label_suffix) xydata[label] = (recall, precision) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Mean of empty slice', RuntimeWarning) mAP = np.nanmean(aps) if 0: import seaborn as sns import pandas as pd # sns.set() # TODO: deprecate multi_plot for seaborn? data_groups = { key: {'recall': r, 'precision': p} for key, (r, p) in xydata.items() } print('data_groups = {}'.format(ub.repr2(data_groups, nl=3))) longform = [] for key, subdata in data_groups.items(): subdata = pd.DataFrame.from_dict(subdata) subdata['label'] = key longform.append(subdata) data = pd.concat(longform) fig = kwplot.figure(fnum=fnum) ax = fig.gca() longform = [] for key, (r, p) in xydata.items(): subdata = pd.DataFrame.from_dict({'recall': r, 'precision': p, 'label': key}) longform.append(subdata) data = pd.concat(longform) palette = ub.dzip(xydata.keys(), kwplot.distinct_colors(len(xydata))) # markers = ub.dzip(xydata.keys(), kwplot.distinct_markers(len(xydata))) sns.lineplot( data=data, x='recall', y='precision', hue='label', style='label', ax=ax, # markers=markers, estimator=None, ci=0, hue_order=list(xydata.keys()), palette=palette, ) ax.set_xlim(0, 1) ax.set_ylim(0, 1) else: ax = kwplot.multi_plot( xydata=xydata, fnum=fnum, xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01, xlabel='recall', ylabel='precision', err_style='bars', title=prefix + 'OVR mAP={:.4f}'.format(mAP), legend_loc='lower right', color='distinct', linestyle='cycle', marker='cycle', **kw ) return ax
def _devcheck_corner(): self = DelayedWarp.random(rng=0) print(self.nesting()) region_slices = (slice(40, 90), slice(20, 62)) region_box = kwimage.Boxes.from_slice(region_slices, shape=self.shape) region_bounds = region_box.to_polygons()[0] for leaf in self._optimize_paths(): pass tf_leaf_to_root = leaf['transform'] tf_root_to_leaf = np.linalg.inv(tf_leaf_to_root) leaf_region_bounds = region_bounds.warp(tf_root_to_leaf) leaf_region_box = leaf_region_bounds.bounding_box().to_ltrb() leaf_crop_box = leaf_region_box.quantize() lt_x, lt_y, rb_x, rb_y = leaf_crop_box.data[0, 0:4] root_crop_corners = leaf_crop_box.to_polygons()[0].warp(tf_leaf_to_root) # leaf_crop_slices = (slice(lt_y, rb_y), slice(lt_x, rb_x)) crop_offset = leaf_crop_box.data[0, 0:2] corner_offset = leaf_region_box.data[0, 0:2] offset_xy = crop_offset - corner_offset tf_root_to_leaf # NOTE: # Cropping applies a translation in whatever space we do it in # We need to save the bounds of the crop. # But now we need to adjust the transform so it points to the # cropped-leaf-space not just the leaf-space, so we invert the implicit # crop tf_crop_to_leaf = Affine.affine(offset=crop_offset) # tf_newroot_to_root = Affine.affine(offset=region_box.data[0, 0:2]) tf_root_to_newroot = Affine.affine(offset=region_box.data[0, 0:2]).inv() tf_crop_to_leaf = Affine.affine(offset=crop_offset) tf_crop_to_newroot = tf_root_to_newroot @ tf_leaf_to_root @ tf_crop_to_leaf tf_newroot_to_crop = tf_crop_to_newroot.inv() # tf_leaf_to_crop # tf_corner_offset = Affine.affine(offset=offset_xy) subpixel_offset = Affine.affine(offset=offset_xy).matrix tf_crop_to_leaf = subpixel_offset # tf_crop_to_root = tf_leaf_to_root @ tf_crop_to_leaf # tf_root_to_crop = np.linalg.inv(tf_crop_to_root) if 1: import kwplot kwplot.autoplt() lw, lh = leaf['sub_data_shape'][0:2] leaf_box = kwimage.Boxes([[0, 0, lw, lh]], 'xywh') root_box = kwimage.Boxes([[0, 0, self.dsize[0], self.dsize[1]]], 'xywh') ax1 = kwplot.figure(fnum=1, pnum=(2, 2, 1), doclf=1).gca() ax2 = kwplot.figure(fnum=1, pnum=(2, 2, 2)).gca() ax3 = kwplot.figure(fnum=1, pnum=(2, 2, 3)).gca() ax4 = kwplot.figure(fnum=1, pnum=(2, 2, 4)).gca() root_box.draw(setlim=True, ax=ax1) leaf_box.draw(setlim=True, ax=ax2) region_bounds.draw(ax=ax1, color='green', alpha=.4) leaf_region_bounds.draw(ax=ax2, color='green', alpha=.4) leaf_crop_box.draw(ax=ax2, color='purple') root_crop_corners.draw(ax=ax1, color='purple', alpha=.4) new_w = region_box.to_xywh().data[0, 2] new_h = region_box.to_xywh().data[0, 3] ax3.set_xlim(0, new_w) ax3.set_ylim(0, new_h) crop_w = leaf_crop_box.to_xywh().data[0, 2] crop_h = leaf_crop_box.to_xywh().data[0, 3] ax4.set_xlim(0, crop_w) ax4.set_ylim(0, crop_h) pts3_ = kwimage.Points.random(3).scale((new_w, new_h)) pts3 = kwimage.Points( xy=np.vstack([[[0, 0], [5, 5], [0, 49], [40, 45]], pts3_.xy])) pts4 = pts3.warp(tf_newroot_to_crop.matrix) pts3.draw(ax=ax3) pts4.draw(ax=ax4)
def draw_roc(info, prefix='', fnum=1, **kw): """ Args: info (Measures | Dict) NOTE: There needs to be enough negative examples for using ROC to make any sense! Example: >>> # xdoctest: +REQUIRES(module:kwplot, module:seaborn) >>> from kwcoco.metrics.drawing import * # NOQA >>> from kwcoco.metrics import DetectionMetrics >>> dmet = DetectionMetrics.demo(nimgs=30, null_pred=1, classes=3, >>> nboxes=10, n_fp=10, box_noise=0.3, >>> with_probs=False) >>> dmet.true_detections(0).data >>> cfsn_vecs = dmet.confusion_vectors(compat='mutex', prioritize='iou', bias=0) >>> print(cfsn_vecs.data._pandas().sort_values('score')) >>> classes = cfsn_vecs.classes >>> info = ub.peek(cfsn_vecs.binarize_ovr().measures()['perclass'].values()) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> draw_roc(info) >>> kwplot.show_if_requested() """ import kwplot try: fp_count = info['trunc_fp_count'] fp_rate = info['trunc_fpr'] tp_rate = info['trunc_tpr'] auc = info['trunc_auc'] except KeyError: fp_count = info['fp_count'] fp_rate = info['fpr'] tp_rate = info['tpr'] auc = info['auc'] realpos_total = info['realpos_total'] title = prefix + 'AUC*: {:.4f}'.format(auc) falsepos_total = fp_count[-1] if 0: # TODO: deprecate multi_plot for seaborn? fig = kwplot.figure(fnum=fnum) ax = fig.gca() import seaborn as sns xlabel = 'fpr (count={})'.format(falsepos_total) ylabel = 'tpr (count={})'.format(int(realpos_total)) data = { xlabel: list(fp_rate), ylabel: list(tp_rate), } sns.lineplot(data=data, x=xlabel, y=ylabel, markers='', ax=ax) ax.set_title(title) else: realpos_total_disp = inty_display(realpos_total) ax = kwplot.multi_plot( list(fp_rate), list(tp_rate), marker='', # xlabel='FA count (false positive count)', xlabel='fpr (count={})'.format(falsepos_total), ylabel='tpr (count={})'.format(realpos_total_disp), title=title, ylim=(0, 1), ypad=1e-2, xlim=(0, 1), xpad=1e-2, fnum=fnum, **kw) return ax
def plot_matrix(matrix, index=None, columns=None, rot=90, ax=None, grid=True, label=None, zerodiag=False, cmap='viridis', showvals=False, showzero=True, logscale=False, xlabel=None, ylabel=None, fnum=None, pnum=None): """ Helper for plotting confusion matrices Args: matrix (ndarray | pd.DataFrame) : if a data frame then index, columns, xlabel, and ylabel will be defaulted to sensible values. TODO: - [ ] Finish args docs - [ ] Replace internals with seaborn Example: >>> from kwplot.mpl_draw import * # NOQA >>> classes = ['cls1', 'cls2', 'cls3'] >>> matrix = np.array([[2, 2, 1], [3, 1, 0], [1, 0, 0]]) >>> matrix = pd.DataFrame(matrix, index=classes, columns=classes) >>> matrix.index.name = 'real' >>> matrix.columns.name = 'pred' >>> plot_matrix(matrix, showvals=True) >>> # xdoc: +REQUIRES(--show) >>> import matplotlib.pyplot as plt >>> import kwplot >>> kwplot.autompl() >>> plot_matrix(matrix, showvals=True) Example: >>> from kwplot.mpl_draw import * # NOQA >>> matrix = np.array([[2, 2, 1], [3, 1, 0], [1, 0, 0]]) >>> plot_matrix(matrix) >>> # xdoc: +REQUIRES(--show) >>> import matplotlib.pyplot as plt >>> import kwplot >>> kwplot.autompl() >>> plot_matrix(matrix) Example: >>> from kwplot.mpl_draw import * # NOQA >>> matrix = np.array([[2, 2, 1], [3, 1, 0], [1, 0, 0]]) >>> classes = ['cls1', 'cls2', 'cls3'] >>> plot_matrix(matrix, index=classes, columns=classes) """ import matplotlib as mpl import matplotlib.cm # NOQA assert len(matrix.shape) == 2 if isinstance(matrix, pd.DataFrame): values = matrix.values if index is None and columns is None: index = matrix.index columns = matrix.columns if xlabel is None and ylabel is None: ylabel = index.name xlabel = columns.name else: values = matrix if index is None: index = np.arange(matrix.shape[0]) if columns is None: columns = np.arange(matrix.shape[1]) if ax is None: import kwplot fig = kwplot.figure(fnum=fnum, pnum=pnum) fig.clear() ax = fig.gca() if zerodiag: values = values.copy() values = values - np.diag(np.diag(values)) # aximg = ax.imshow(values, interpolation='none', cmap='viridis') if logscale: from matplotlib.colors import LogNorm vmin = values[values > 0].min().min() norm = LogNorm(vmin=vmin, vmax=values.max()) else: norm = None cmap = copy.copy(mpl.cm.get_cmap(cmap)) # copy the default cmap cmap.set_bad((0, 0, 0)) if not showzero and not logscale: # hack zero to be black cmap.colors[0] = [0, 0, 0] aximg = ax.matshow(values, interpolation='none', cmap=cmap, norm=norm) ax.grid(False) cax = ax.figure.colorbar(aximg, ax=ax) if label is not None: cax.set_label(label) ax.set_xticks(list(range(len(index)))) ax.set_xticklabels([str(lbl)[0:100] for lbl in index]) for lbl in ax.get_xticklabels(): lbl.set_rotation(rot) for lbl in ax.get_xticklabels(): lbl.set_horizontalalignment('center') ax.set_yticks(list(range(len(columns)))) ax.set_yticklabels([str(lbl)[0:100] for lbl in columns]) for lbl in ax.get_yticklabels(): lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels(): lbl.set_verticalalignment('center') # Grid lines around the pixels if grid: offset = -.5 xlim = [-.5, len(columns)] ylim = [-.5, len(index)] segments = [] for x in range(ylim[1]): xdata = [x + offset, x + offset] ydata = ylim segment = list(zip(xdata, ydata)) segments.append(segment) for y in range(xlim[1]): xdata = xlim ydata = [y + offset, y + offset] segment = list(zip(xdata, ydata)) segments.append(segment) bingrid = mpl.collections.LineCollection(segments, color='w', linewidths=1) ax.add_collection(bingrid) if showvals: x_basis = np.arange(len(columns)) y_basis = np.arange(len(index)) x, y = np.meshgrid(x_basis, y_basis) for c, r in zip(x.flatten(), y.flatten()): val = values[r, c] if val == 0: if showzero: ax.text(c, r, val, va='center', ha='center', color='white') else: ax.text(c, r, val, va='center', ha='center', color='white') if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) return ax
def benchmark_nested_break(): """ There are several ways to do a nested break, but which one is best? https://twitter.com/nedbat/status/1515345787563220996 """ import ubelt as ub import pandas as pd import timerit import itertools as it def method1_itertools(iter1, iter2): for i, j in it.product(iter1, iter2): if i == 20 and j == 20: break def method2_except(iter1, iter2): class Found(Exception): pass try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise Found except Found: pass class FoundPredef(Exception): pass def method2_5_except_predef(iter1, iter2): try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise FoundPredef except FoundPredef: pass def method3_gendef(iter1, iter2): def genfunc(): for i in iter1: for j in iter2: yield i, j for i, j in genfunc(): if i == 20 and j == 20: break def method4_genexp(iter1, iter2): genexpr = ((i, j) for i in iter1 for j in iter2) for i, j in genexpr: if i == 20 and j == 20: break method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(1000, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over import numpy as np basis = { 'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'], # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int), # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int), 'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int), 'input_style': ['range', 'list', 'customized_iter'], # 'param_name': [param values], } xlabel = 'size' xinput_labels = ['n1', 'n2', 'size'] # Set these to param labels that directly transfer to method kwargs kw_labels = [] # Set these to empty lists if they are not used group_labels = { 'style': ['input_style'], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) def make_input(params): # Given the parameterization make the benchmark function input # n1 = params['n1'] # n2 = params['n2'] size = params['size'] n1 = int(np.sqrt(size)) n2 = int(np.sqrt(size)) if params['input_style'] == 'list': iter1 = list(range(n1)) iter2 = list(range(n1)) elif params['input_style'] == 'range': iter1 = range(n1) iter2 = range(n2) elif params['input_style'] == 'customized_iter': import random def rando1(): rng1 = random.Random(0) for _ in range(n1): yield rng1.randint(0, n2) def rando2(): rng2 = random.Random(1) for _ in range(n1): yield rng2.randint(0, n2) iter1 = rando1() iter2 = rando2() else: raise KeyError return {'iter1': iter1, 'iter2': iter2} # For each variation of your experiment, create a row. rows = [] for params in grid_iter: # size = params['n1'] * params['n2'] # params['size'] = size group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... kwargs.update(make_input(params)) with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1))) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}') ax.set_xlabel(f'{xlabel}') ax.set_ylabel('Time') ax.set_xscale('log') ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def _dump_measures(tb_data, out_dpath, mode=None, smoothing=0.0, ignore_outliers=True): """ This is its own function in case we need to modify formatting CommandLine: xdoctest -m netharn.mixins _dump_measures --out_dpath=. Example: >>> # SCRIPT >>> # Reread a dumped pickle file >>> from netharn.mixins import * # NOQA >>> from netharn.mixins import _dump_monitor_tensorboard, _dump_measures >>> import json >>> from os.path import join >>> import ubelt as ub >>> try: >>> import seaborn as sns >>> sns.set() >>> except ImportError: >>> pass >>> out_dpath = ub.expandpath('~/work/project/fit/nice/nicename/monitor/tensorboard/') >>> out_dpath = ub.argval('--out_dpath', default=out_dpath) >>> mode = ['epoch', 'iter'] >>> fpath = join(out_dpath, 'tb_data.json') >>> tb_data = json.load(open(fpath, 'r')) >>> import kwplot >>> kwplot.autompl() >>> _dump_measures(tb_data, out_dpath, smoothing=0) """ import ubelt as ub from os.path import join import numpy as np import kwplot import matplotlib as mpl from kwplot.auto_backends import BackendContext with BackendContext('agg'): # kwplot.autompl() # TODO: Is it possible to get htop to show this process with some name that # distinguishes it from the dataloader workers? # import sys # import multiprocessing # if multiprocessing.current_process().name != 'MainProcess': # if sys.platform.startswith('linux'): # import ctypes # libc = ctypes.cdll.LoadLibrary('libc.so.6') # title = 'Netharn MPL Dump Measures' # libc.prctl(len(title), title, 0, 0, 0) # NOTE: This cause warnings when exeucted as daemon process # try: # import seaborn as sbn # sbn.set() # except ImportError: # pass valid_modes = ['epoch', 'iter'] if mode is None: mode = valid_modes if ub.iterable(mode): # Hack: Call with all modes for mode_ in mode: _dump_measures(tb_data, out_dpath, mode=mode_, smoothing=smoothing, ignore_outliers=ignore_outliers) return else: assert mode in valid_modes meta = tb_data.get('meta', {}) nice = meta.get('nice', '?nice?') special_groupers = meta.get('special_groupers', ['loss']) fig = kwplot.figure(fnum=1) plot_keys = [ key for key in tb_data if ('train_' + mode in key or 'vali_' + mode in key or 'test_' + mode in key or mode + '_' in key) ] y01_measures = [ '_acc', '_ap', '_mAP', '_auc', '_mcc', '_brier', '_mauc', ] y0_measures = ['error', 'loss'] keys = set(tb_data.keys()).intersection(set(plot_keys)) # print('mode = {!r}'.format(mode)) # print('tb_data.keys() = {!r}'.format(tb_data.keys())) # print('plot_keys = {!r}'.format(plot_keys)) # print('keys = {!r}'.format(keys)) def smooth_curve(ydata, beta): """ Curve smoothing algorithm used by tensorboard """ import pandas as pd alpha = 1.0 - beta if alpha <= 0: return ydata ydata_smooth = pd.Series(ydata).ewm(alpha=alpha).mean().values return ydata_smooth def inlier_ylim(ydatas): """ outlier removal used by tensorboard """ low, high = None, None for ydata in ydatas: q1 = 0.05 q2 = 0.95 low_, high_ = np.quantile(ydata, [q1, q2]) # Extrapolate how big the entire span should be based on inliers inner_q = q2 - q1 inner_extent = high_ - low_ extrap_total_extent = inner_extent / inner_q # amount of padding to add to either side missing_p1 = q1 missing_p2 = 1 - q2 frac1 = missing_p1 / (missing_p2 + missing_p1) frac2 = missing_p2 / (missing_p2 + missing_p1) missing_extent = extrap_total_extent - inner_extent pad1 = missing_extent * frac1 pad2 = missing_extent * frac2 low_ = low_ - pad1 high_ = high_ + pad2 low = low_ if low is None else min(low_, low) high = high_ if high is None else max(high_, high) return (low, high) # Hack values that we don't apply smoothing to HACK_NO_SMOOTH = ['lr', 'momentum'] def tag_grouper(k): # parts = ['train_epoch', 'vali_epoch', 'test_epoch'] # parts = [p.replace('epoch', 'mode') for p in parts] parts = [p + mode for p in ['train_', 'vali_', 'test_']] for p in parts: if p in k: return p.split('_')[0] return 'unknown' GROUP_LOSSES = True GROUP_AND_INDIVIDUAL = False INDIVIDUAL_PLOTS = True GROUP_SPECIAL = True if GROUP_LOSSES: # Group all losses in one plot for comparison loss_keys = [k for k in keys if 'loss' in k] tagged_losses = ub.group_items(loss_keys, tag_grouper) tagged_losses.pop('unknown', None) kw = {} kw['ymin'] = 0.0 # print('tagged_losses = {!r}'.format(tagged_losses)) for tag, losses in tagged_losses.items(): min_abs_y = .01 min_y = 0 xydata = ub.odict() for key in sorted(losses): ydata = tb_data[key]['ydata'] if HACK_NO_SMOOTH not in key.split('_'): ydata = smooth_curve(ydata, smoothing) try: min_y = min(min_y, ydata.min()) pos_ys = ydata[ydata > 0] min_abs_y = min(min_abs_y, pos_ys.min()) except Exception: pass xydata[key] = (tb_data[key]['xdata'], ydata) kw['ymin'] = min_y if ignore_outliers: low, kw['ymax'] = inlier_ylim( [t[1] for t in xydata.values()]) yscales = ['symlog', 'linear'] for yscale in yscales: fig.clf() ax = fig.gca() title = nice + '\n' + tag + '_' + mode + ' losses' kwplot.multi_plot(xydata=xydata, ylabel='loss', xlabel=mode, yscale=yscale, title=title, fnum=1, ax=ax, **kw) if yscale == 'symlog': if LooseVersion( mpl.__version__) >= LooseVersion('3.3'): ax.set_yscale('symlog', linthresh=min_abs_y) else: ax.set_yscale('symlog', linthreshy=min_abs_y) fname = '_'.join([tag, mode, 'multiloss', yscale]) + '.png' fpath = join(out_dpath, fname) ax.figure.savefig(fpath) # don't dump losses individually if we dump them in a group if not GROUP_AND_INDIVIDUAL: keys.difference_update(set(loss_keys)) # print('keys = {!r}'.format(keys)) if GROUP_SPECIAL: tag_groups = ub.group_items(keys, tag_grouper) tag_groups.pop('unknown', None) # Group items matching these strings kw = {} for tag, tag_keys in tag_groups.items(): for groupname in special_groupers: group_keys = [ k for k in tag_keys if groupname in k.split('_') ] if len(group_keys) > 1: # Gather data for this group xydata = ub.odict() for key in sorted(group_keys): ydata = tb_data[key]['ydata'] if HACK_NO_SMOOTH not in key.split('_'): ydata = smooth_curve(ydata, smoothing) xydata[key] = (tb_data[key]['xdata'], ydata) if ignore_outliers: low, kw['ymax'] = inlier_ylim( [t[1] for t in xydata.values()]) yscales = ['linear'] for yscale in yscales: fig.clf() ax = fig.gca() title = nice + '\n' + tag + '_' + mode + ' ' + groupname kwplot.multi_plot(xydata=xydata, ylabel=groupname, xlabel=mode, yscale=yscale, title=title, fnum=1, ax=ax, **kw) if yscale == 'symlog': ax.set_yscale('symlog', linthreshy=min_abs_y) fname = '_'.join([ tag, mode, 'group-' + groupname, yscale ]) + '.png' fpath = join(out_dpath, fname) ax.figure.savefig(fpath) if not GROUP_AND_INDIVIDUAL: keys.difference_update(set(group_keys)) if INDIVIDUAL_PLOTS: # print('keys = {!r}'.format(keys)) for key in keys: d = tb_data[key] ydata = d['ydata'] ydata = smooth_curve(ydata, smoothing) kw = {} if any(m.lower() in key.lower() for m in y01_measures): kw['ymin'] = 0.0 kw['ymax'] = 1.0 elif any(m.lower() in key.lower() for m in y0_measures): kw['ymin'] = min(0.0, ydata.min()) if ignore_outliers: low, kw['ymax'] = inlier_ylim([ydata]) # NOTE: this is actually pretty slow fig.clf() ax = fig.gca() title = nice + '\n' + key kwplot.multi_plot(d['xdata'], ydata, ylabel=key, xlabel=mode, title=title, fnum=1, ax=ax, **kw) # png is slightly smaller than jpg for this kind of plot fpath = join(out_dpath, key + '.png') # print('save fpath = {!r}'.format(fpath)) ax.figure.savefig(fpath)
def benchmark_mul_vs_pow(): import ubelt as ub import pandas as pd import timerit from functools import reduce import operator as op import itertools as it def method_pow_via_mul_raw(n): """ Construct a function that does multiplication of a value n times """ return eval('lambda v: ' + ' * '.join(['v'] * n)) def method_pow_via_mul_for(v, n): ret = v for _ in range(1, n): ret = ret * v return ret def method_pow_via_mul_reduce(v, n): """ Alternative way to multiply a value n times """ return reduce(op.mul, it.repeat(v, n)) def method_pow_via_pow(v, n): return v ** n method_lut = locals() # can populate this some other way ti = timerit.Timerit(500000, bestof=1000, verbose=2) basis = { 'method': ['method_pow_via_mul_raw', 'method_pow_via_pow'], 'n': list(range(1, 20)), 'v': ['random-int', 'random-float'], # 'param_name': [param values], } xlabel = 'n' kw_labels = ['v', 'n'] group_labels = { 'style': ['v'], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit if params['method'] == 'method_pow_via_mul_raw': method = method(kwargs.pop('n')) for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... import random if kwargs['v'] == 'random': kwargs['v'] = random.randint(1, 31000) if random.random() > 0.5 else random.random() elif kwargs['v'] == 'random-int': kwargs['v'] = random.randint(1, 31000) elif kwargs['v'] == 'random-float': kwargs['v'] = random.random() with timer: # Put the logic you want to time here method(**kwargs) for time in map(min, ub.chunks(ti.times, ti.bestof)): row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) # data = data.sort_values('time') print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('N') ax.set_ylabel('Time') ax.set_yscale('log') plt.show()