def _load_all_scene_paths(task): """ Parses scene paths into dictionaries that organize it by scenes suitable for cross validation. """ scene_im_paths = ub.AutoDict() scene_gt_paths = ub.AutoDict() keys = task._preprocessing_keys() for scene, key in it.product(task.scene_ids, keys): im_dpath = task.datasubdir('im' + key, scene) gt_dpath = task.datasubdir('gt' + key, scene) im_paths = imutil.load_image_paths(im_dpath, ext='.png') gt_paths = imutil.load_image_paths(gt_dpath, ext='.png') im_paths = list(map(abspath, im_paths)) gt_paths = list(map(abspath, gt_paths)) scene_im_paths[scene][key] = im_paths scene_gt_paths[scene][key] = gt_paths scene_im_paths = scene_im_paths.to_dict() scene_gt_paths = scene_gt_paths.to_dict() return scene_im_paths, scene_gt_paths
def _all_scene_dpaths(task): """ Returns the directories that the train testing data will exist in """ scene_im_dpaths = ub.AutoDict() scene_gt_dpaths = ub.AutoDict() keys = task._preprocessing_keys() for scene, key in it.product(task.scene_ids, keys): im_dpath = task.datasubdir('im' + key, scene) gt_dpath = task.datasubdir('gt' + key, scene) scene_im_dpaths[scene][key] = im_dpath scene_gt_dpaths[scene][key] = gt_dpath return scene_im_dpaths, scene_gt_dpaths
def decollate_batch(batch): """ Breakup a collated batch of BatchContainers back into ItemContainers Example: >>> bsize = 5 >>> batch_items = [ >>> { >>> 'im': ItemContainer.demo('img'), >>> 'label': ItemContainer.demo('labels'), >>> 'box': ItemContainer.demo('box'), >>> } >>> for _ in range(bsize) >>> ] >>> batch = container_collate(batch_items, num_devices=2) >>> decollated = decollate_batch(batch) >>> assert len(decollated) == len(batch_items) >>> assert (decollated[0]['im'].data == batch_items[0]['im'].data).all() """ import ubelt as ub from kwcoco.util.util_json import IndexableWalker walker = IndexableWalker(batch) decollated_dict = ub.AutoDict() decollated_walker = IndexableWalker(decollated_dict) for path, batch_val in walker: if isinstance(batch_val, BatchContainer): for bx, item_val in enumerate(ub.flatten(batch_val.data)): decollated_walker[[bx] + path] = ItemContainer(item_val) decollated = list(decollated_dict.to_dict().values()) return decollated
def bench_isinstance_vs_attr(): instances = { 'base1': Base1(), 'base2': Base2(), 'derived2': Derived2(), } import ubelt as ub ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us') # Do this twice, but keep the second measure data = ub.AutoDict() for selfname, self in instances.items(): print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue')) subdata = data[selfname] = {} for timer in ti.reset('isinstance(self, Base1)'): with timer: isinstance(self, Base1) subdata[ti.label] = ti.min() for timer in ti.reset('isinstance(self, Base2)'): with timer: isinstance(self, Base2) subdata[ti.label] = ti.min() for timer in ti.reset('isinstance(self, Derived2)'): with timer: isinstance(self, Derived2) subdata[ti.label] = ti.min() for timer in ti.reset('getattr(self, "class_attr1", False)'): with timer: getattr(self, 'class_attr1', False) subdata[ti.label] = ti.min() for timer in ti.reset('getattr(self, "attr1", False)'): with timer: getattr(self, 'attr1', False) subdata[ti.label] = ti.min() try: import pandas as pd df = pd.DataFrame(data) * 1e9 try: from kwil.util.util_pandas import _to_string_monkey print(_to_string_monkey(df, key='minima')) except Exception: print(df) except ImportError: print('no pandas') print(ub.repr2(data, nl=2, precision=4))
def test_auto_dict(): auto = ub.AutoDict() assert 0 not in auto auto[0][10][100] = None assert 0 in auto assert isinstance(auto[0], ub.AutoDict)
def benchmark_hash_data(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --convert=True --show python ~/code/ubelt/dev/bench_hash.py --convert=False --show """ import ubelt as ub #ITEM = 'JUST A STRING' * 100 ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4] HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 13)) results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 convert = ub.argval('--convert', default='True').lower() == 'True' print('convert = {!r}'.format(convert)) ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2**s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) data = [ITEM] * N for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_data(data, hasher=hasher, convert=convert) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print('convert = {!r}'.format(convert)) print(ratios.mean().sort_values()) if ub.argflag('--show'): import kwplot kwplot.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results) kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds', title='convert = {}'.format(convert)) kwplot.show_if_requested()
def benchmark_attribute_access(): """ How fast are different methods of accessing attributes? Lets find out! """ instances = { 'simple': Simple(), 'complex': Complex(), 'slot_simple': SimpleWithSlots(), 'slot_complex': ComplexWithSlots(), } import ubelt as ub ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us') # Do this twice, but keep the second measure data = ub.AutoDict() for selfname, self in instances.items(): print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue')) subdata = data[selfname] = {} for timer in ti.reset('self.attr1'): with timer: self.attr1 subdata[ti.label] = ti.min() for timer in ti.reset('getattr(self, attr1)'): with timer: getattr(self, 'attr1') subdata[ti.label] = ti.min() attrs = ['attr1', 'attr2'] for attrname in attrs: for timer in ti.reset('hasattr(self, {})'.format(attrname)): with timer: hasattr(self, attrname) subdata[ti.label] = ti.min() for timer in ti.reset('getattr(self, {}, None)'.format(attrname)): with timer: getattr(self, attrname, None) subdata[ti.label] = ti.min() if 'slot' not in selfname.lower(): for timer in ti.reset( 'self.__dict__.get({}, None)'.format(attrname)): with timer: self.__dict__.get(attrname, None) subdata[ti.label] = ti.min() for timer in ti.reset('try/except: self.attr2'): with timer: try: x = self.attr2 except AttributeError: x = None subdata[ti.label] = ti.min() for timer in ti.reset('try/except: self.attr1'): with timer: try: x = self.attr1 except AttributeError: x = None subdata[ti.label] = ti.min() del x try: import pandas as pd df = pd.DataFrame(data) * 1e9 try: from kwil.util.util_pandas import _to_string_monkey print(_to_string_monkey(df, key='minima')) except Exception: print(df) except ImportError: print('no pandas') print(ub.repr2(data, nl=2, precision=4))
def benchmark_hash_file(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --show python ~/code/ubelt/dev/bench_hash.py --show """ import ubelt as ub import random # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensuredir(ub.expandpath('$HOME/tmp')) rng = random.Random(0) # Create a pool of random chunks of data chunksize = int(2 ** 20) pool_size = 8 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] #ITEM = 'JUST A STRING' * 100 HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 10)) import os results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2 ** s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) # Write a big file size_pool = [N] fpath = _write_random_file(dpath, part_pool, size_pool, rng) megabytes = os.stat(fpath).st_size / (2 ** 20) print('megabytes = {!r}'.format(megabytes)) for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_file(fpath, hasher=hasher) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print(ratios.mean().sort_values()) if ub.argflag('--show'): import kwplot kwplot.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results) kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds') kwplot.show_if_requested()
def 灵活字典(): return ub.AutoDict()
def build_pyproject(self): # data = toml.loads((self.template_dpath / 'pyproject.toml').read_text()) # print('data = {}'.format(ub.repr2(data, nl=5))) pyproj_config = ub.AutoDict() # {'tool': {}} if 'binpy' in self.config['tags']: pyproj_config['build-system']['requires'] = [ "setuptools>=41.0.1", # setuptools_scm[toml] "wheel", "scikit-build>=0.9.0", "numpy", "ninja" ] pyproj_config['tool']['cibuildwheel'].update({ 'build': "cp37-* cp38-* cp39-* cp310-*", 'build-frontend': "build", 'skip': "pp* cp27-* cp34-* cp35-* cp36-* *-musllinux_*", 'build-verbosity': 1, 'test-requires': ["-r requirements/tests.txt"], 'test-command': "python {project}/run_tests.py" }) if True: cibw = pyproj_config['tool']['cibuildwheel'] req_commands = { 'linux': [ 'yum install epel-release lz4 lz4-devel -y', ], 'windows': [ 'choco install lz4 -y', ], 'macos': [ 'brew install lz4', ] } for plat in req_commands.keys(): cmd = ' && '.join(req_commands[plat]) cibw[plat]['before-all'] = cmd WITH_PYTEST_INI = 1 if WITH_PYTEST_INI: pytest_ini_opts = pyproj_config['tool']['pytest']['ini_options'] pytest_ini_opts[ 'addopts'] = "-p no:doctest --xdoctest --xdoctest-style=google --ignore-glob=setup.py" pytest_ini_opts[ 'norecursedirs'] = ".git ignore build __pycache__ dev _skbuild" pytest_ini_opts['filterwarnings'] = [ "default", "ignore:.*No cfgstr given in Cacher constructor or call.*:Warning", "ignore:.*Define the __nice__ method for.*:Warning", "ignore:.*private pytest class or function.*:Warning", ] WITH_COVERAGE = 1 if WITH_COVERAGE: pyproj_config['tool']['coverage'].update( toml.loads( ub.codeblock(''' [run] branch = true [report] exclude_lines =[ "pragma: no cover", ".* # pragma: no cover", ".* # nocover", "def __repr__", "raise AssertionError", "raise NotImplementedError", "if 0:", "if trace is not None", "verbose = .*", "^ *raise", "^ *pass *$", "if _debug:", "if __name__ == .__main__.:", ".*if six.PY2:" ] omit=[ "{REPO_NAME}/__main__.py", "*/setup.py" ] ''').format(REPO_NAME=self.repo_name))) text = toml.dumps(pyproj_config) return text