def main(cls, cmdline=True, **kw): """ Example: >>> # xdoctest: +SKIP >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoConformCLI >>> cls.main(cmdline, **kw) """ import kwcoco config = cls.CLIConfig(kw, cmdline=cmdline) print('config = {}'.format(ub.repr2(dict(config), nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if config['dst'] is None: raise Exception('must specify dest: {}'.format(config['dst'])) dset = kwcoco.CocoDataset.coerce(config['src']) config_ = ub.dict_diff(config, {'src', 'dst'}) dset.conform(**config_) dset.fpath = config['dst'] print('dump dset.fpath = {!r}'.format(dset.fpath)) dset.dump(dset.fpath, newlines=True)
def main(cls, cmdline=True, **kw): """ Example: >>> from kwcoco.cli.coco_validate import * # NOQA >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoValidateCLI >>> cls.main(cmdline, **kw) """ import kwcoco config = cls.CLIConfig(kw, cmdline=cmdline) print('config = {}'.format(ub.repr2(dict(config), nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if isinstance(config['src'], str): fpaths = [config['src']] else: fpaths = config['src'] if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') fix_strat = set() if config['fix'] is not None: fix_strat = {c.lower() for c in config['fix'].split('+')} for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1): print('reading fpath = {!r}'.format(fpath)) dset = kwcoco.CocoDataset.coerce(fpath) config_ = ub.dict_diff(config, {'src', 'dst', 'fix'}) result = dset.validate(**config_) if 'missing' in result: if 'remove' in fix_strat: missing = result['missing'] bad_gids = [t[2] for t in missing] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if 'corrupted' in result: if 'remove' in fix_strat: corrupted = result['corrupted'] bad_gids = [t[2] for t in corrupted] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') dset.dump(config['dst'], newlines=True) errors = result['errors'] if errors: print('result = {}'.format(ub.repr2(result, nl=-1))) raise Exception('\n'.join(errors))
def populate_from(self, dset): from sqlalchemy import inspect session = self.session inspector = inspect(self.engine) for key in self.engine.table_names(): colinfo = inspector.get_columns(key) colnames = {c['name'] for c in colinfo} # TODO: is there a better way to grab this information? cls = TBLNAME_TO_CLASS[key] for item in dset.dataset.get(key, []): item_ = ub.dict_isect(item, colnames) # Everything else is a foreign key item['foreign'] = ub.dict_diff(item, item_) if key == 'annotations': # Need custom code to translate list-based properties x, y, w, h = item['bbox'] item_['bbox_x'] = x item_['bbox_y'] = y item_['bbox_w'] = w item_['bbox_h'] = h row = cls(**item_) session.add(row) session.commit()
def main(): # TODO: progressive hashing data structure inv1 = Inventory('/media/joncrall/raid/', blocklist) inv2 = Inventory('/media/joncrall/media', blocklist) # inv1 = Inventory('/media/joncrall/raid/Applications/NotGames', blocklist) # inv2 = Inventory('/media/joncrall/media/Applications/NotGames', blocklist) # inv1 = Inventory('/media/joncrall/raid/Applications', blocklist) # inv2 = Inventory('/media/joncrall/media/Applications', blocklist) self = inv1 # NOQA inv1.build() inv2.build() thresh = { 'frac': 0.5, 'byte': 100 * int(2**20) # only use the first few mb to determine overlap } verbose = 1 pfiles1 = inv1.pfiles pfiles2 = inv2.pfiles overlap, only1, only2 = ProgressiveFile.likely_overlaps(pfiles1, pfiles2, thresh=thresh, verbose=verbose) stats = { 'overlap': len(overlap), 'only1': len(only1), 'only2': len(only2), } print('stats = {}'.format(ub.repr2(stats, nl=1))) only2_list = sorted([p.fpath for group in only2.values() for p in group]) print('only2_list = {}'.format(ub.repr2(only2_list, nl=1))) print('stats = {}'.format(ub.repr2(stats, nl=1))) # for pfile in inv1.pfiles: # pfile._check_integrity() import numpy as np mb_read = np.array([ pfile._parts[-1][1] / int(2**20) for pfile in ub.ProgIter(inv2.pfiles) ]) mb_read.max() mb_read.min() # Build all hashes up to a reasonable degree inv1.build_hashes(max_workers=0) maybe_dups = inv1.likely_duplicates(thresh=0.2) len(maybe_dups) maybe_dups = ub.sorted_keys(maybe_dups, key=lambda x: x[2]) import networkx as nx import itertools as it # Check which directories are most likely to be duplicates graph = nx.Graph() for key, group in ub.ProgIter(maybe_dups.items(), total=len(maybe_dups), desc='build dup dir graph'): if key[0] == '': continue dpaths = [dirname(pfile.fpath) for pfile in group] for d1, d2 in it.combinations(dpaths, 2): graph.add_edge(d1, d2) edge = graph.edges[(d1, d2)] if 'dups' not in edge: edge['dups'] = 0 edge['dups'] += 1 edge_data = list(graph.edges(data=True)) for dpath in ub.ProgIter(graph.nodes, desc='find lens'): num_children = len(os.listdir(dpath)) graph.nodes[dpath]['num_children'] = num_children for d1, d2, dat in edge_data: nc1 = graph.nodes[d1]['num_children'] nc2 = graph.nodes[d2]['num_children'] ndups = dat['dups'] dup_score = (dat['dups'] / min(nc1, nc2)) dat['dup_score'] = dup_score if dup_score > 0.9: print('dup_score = {!r}'.format(dup_score)) print('d1 = {!r}'.format(d1)) print('d2 = {!r}'.format(d2)) print('nc1 = {!r}'.format(nc1)) print('nc2 = {!r}'.format(nc2)) print('ndups = {!r}'.format(ndups)) print('edge_data = {}'.format(ub.repr2(edge_data, nl=2))) print('maybe_dups = {}'.format(ub.repr2(maybe_dups.keys(), nl=3))) for key, group in maybe_dups.items(): if key[0] == '': continue print('key = {!r}'.format(key)) print('group = {}'.format(ub.repr2(group, nl=1))) for pfile in group: pfile.refined_to(float('inf')) print('key = {!r}'.format(key)) inv2.build_hashes(max_workers=6, mode='thread') inv1.pfiles = [ p for p in ub.ProgIter(inv1.pfiles, desc='exist check') if exists(p.fpath) ] inv2.pfiles = [ p for p in ub.ProgIter(inv2.pfiles, desc='exist check') if exists(p.fpath) ] pfiles1 = inv1.pfiles pfiles2 = inv2.pfiles def compute_likely_overlaps(pfiles1, pfiles2): step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1) step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2) step_idx = min(step_idx1, step_idx2) grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx) grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx) thresh = 0.2 verbose = 1 # TODO: it would be nice if we didn't have to care about internal # deduplication when we attempt to find cross-set overlaps dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles, thresh=thresh, verbose=verbose) dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles, thresh=thresh, verbose=verbose) pfiles = inv1.pfiles + inv2.pfiles dups3 = ProgressiveFile.likely_duplicates(pfiles, thresh=thresh, verbose=verbose) only_on_inv2 = {} for key, group in dups3.items(): if not any( item.fpath.startswith(inv1.root_fpath) for item in group): only_on_inv2[key] = group for p1 in inv1.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath: break for p2 in inv2.pfiles: if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath: break look = list(ub.flatten(only_on_inv2.values())) takealook = sorted([p.fpath for p in look]) print('takealook = {}'.format(ub.repr2(takealook, nl=1))) keys1 = set(grouped1) keys2 = set(grouped2) missing_keys2 = keys2 - keys1 missing_groups2 = ub.dict_subset(grouped2, missing_keys2) missing_fpaths2 = [] for key, values in missing_groups2.items(): print('key = {!r}'.format(key)) print('values = {}'.format(ub.repr2(values, nl=1))) missing_fpaths2.extend(values) missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2]) print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1))) # pass import xdev set_overlaps = xdev.set_overlaps(keys1, keys2) print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1))) # We want to know what files in set2 do not exist in set1 if 0: fpath = inv1.all_fpaths[0] pfile = ProgressiveFile(fpath) fpath1 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Transfer/Zebras/DownloadedLibraries/lightspeed/solve_triu.m' fpath2 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Zebras/downloaded_libraries/lightspeed/solve_triu.m' fpath1 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Falco/DarkFalco02.pcs' fpath2 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Ivysaur/Kraid-v2-Ivy.pcs' pfile = pfile1 = ProgressiveFile(fpath1) pfile2 = ProgressiveFile(fpath2) pfile.maybe_equal(pfile2, thresh=0.1) fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500] # fpaths = hash_groups1_dup['ef46db3751d8e999'] pfiles_demodata = [ProgressiveFile(f) for f in fpath_demodata] def progressive_duplicates(pfiles, idx=1): step_ids = [pfile.refined_to(idx) for pfile in ub.ProgIter(pfiles)] final_groups = {} grouped = ub.group_items(pfiles, step_ids) for key, group in grouped.items(): if len(group) > 1: if all(not g.can_refine for g in group): # Group is ~100% a real duplicate final_groups[key] = group else: pfiles = group deduped = progressive_duplicates(pfiles, idx=idx + 1) final_groups.update(deduped) else: final_groups[key] = group return final_groups pfiles = pfiles_demodata final_groups = progressive_duplicates(pfiles) for key, group in final_groups.items(): if len(group) > 1: print('key = {!r}'.format(key)) print('group = {}'.format(ub.repr2(group, nl=1))) inv1.build_hashes() inv2.build_hashes() hash_groups1 = ub.group_items(inv1.all_fpaths, inv1.all_hashes) hash_groups2 = ub.group_items(inv2.all_fpaths, inv2.all_hashes) hash_groups1_dup = { k: v for k, v in hash_groups1.items() if len(v) > 1 } hash_groups2_dup = { k: v for k, v in hash_groups2.items() if len(v) > 1 } len(hash_groups1_dup) len(hash_groups2_dup) # common = set(hash_groups1) & set(hash_groups2) # xdev.set_overlaps(hash_groups1, hash_groups2) fnames1 = ub.group_items(inv1.all_fpaths, key=basename) fnames2 = ub.group_items(inv2.all_fpaths, key=basename) missing = ub.dict_diff(fnames2, fnames1) sorted(ub.flatten(missing.values())) len(missing) fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500] def internal_deduplicate(self): hash_groups = ub.group_items(self.all_fpaths, self.all_hashes) hash_groups_dup = { k: v for k, v in hash_groups.items() if len(v) > 1 } from os.path import dirname hash_groups_dup['ef46db3751d8e999'] for key, values in hash_groups_dup.items(): for v in values: if v.endswith('.avi'): break [basename(v) for v in values] [dirname(v) for v in values]
def 字典_差集(*args): # 字典_差集({'a': 1, 'b': 1}, {'a'}, {'c'}) data = ub.dict_diff(*args) return data
def warp_image_test(image, transform, dsize=None): """ from kwimage.transform import Affine import kwimage image = kwimage.grab_test_image('checkerboard', dsize=(2048, 2048)).astype(np.float32) image = kwimage.grab_test_image('astro', dsize=(2048, 2048)) transform = Affine.random() @ Affine.scale(0.01) """ from kwimage.transform import Affine import kwimage import numpy as np import ubelt as ub # Choose a random affine transform that probably has a small scale # transform = Affine.random() @ Affine.scale((0.3, 2)) # transform = Affine.scale((0.1, 1.2)) # transform = Affine.scale(0.05) transform = Affine.random() @ Affine.scale(0.01) # transform = Affine.random() image = kwimage.grab_test_image('astro') image = kwimage.grab_test_image('checkerboard') image = kwimage.ensure_float01(image) from kwimage import im_cv2 import kwarray import cv2 transform = Affine.coerce(transform) if 1 or dsize is None: h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) import timerit ti = timerit.Timerit(10, bestof=3, verbose=2) def _full_gauss_kernel(k0, sigma0, scale): num_downscales = np.log2(1 / scale) if num_downscales < 0: return 1, 0 # Define b0 = kernel size for one downsample operation b0 = 5 # Define sigma0 = sigma for one downsample operation sigma0 = 1 # The kernel size and sigma doubles for each 2x downsample k = int(np.ceil(b0 * (2 ** (num_downscales - 1)))) sigma = sigma0 * (2 ** (num_downscales - 1)) if k % 2 == 0: k += 1 return k, sigma def pyrDownK(a, k=1): assert k >= 0 for _ in range(k): a = cv2.pyrDown(a) return a for timer in ti.reset('naive'): with timer: interpolation = 'nearest' flags = im_cv2._coerce_interpolation(interpolation) final_v5 = cv2.warpAffine(image, transform.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 1 # for timer in ti.reset('resize+warp'): with timer: params = transform.decompose() sx, sy = params['scale'] noscale_params = ub.dict_diff(params, {'scale'}) noscale_warp = Affine.affine(**noscale_params) h, w = image.shape[0:2] resize_dsize = (int(np.ceil(sx * w)), int(np.ceil(sy * h))) downsampled = cv2.resize(image, dsize=resize_dsize, fx=sx, fy=sy, interpolation=cv2.INTER_AREA) interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v1 = cv2.warpAffine(downsampled, noscale_warp.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 2 for timer in ti.reset('fullblur+warp'): with timer: k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=sx) k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=sy) image_ = image.copy() image_ = cv2.GaussianBlur(image_, (k_x, k_y), sigma_x, sigma_y) image_ = kwarray.atleast_nd(image_, 3) # image_ = image_.clip(0, 1) interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v2 = cv2.warpAffine(image_, transform.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 3 for timer in ti.reset('pyrDown+blur+warp'): with timer: temp = image.copy() params = transform.decompose() sx, sy = params['scale'] biggest_scale = max(sx, sy) # The -2 allows the gaussian to be a little bigger. This # seems to help with border effects at only a small runtime cost num_downscales = max(int(np.log2(1 / biggest_scale)) - 2, 0) pyr_scale = 1 / (2 ** num_downscales) # Does the gaussian downsampling temp = pyrDownK(image, num_downscales) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale partial_scale = Affine.scale((rest_sx, rest_sy)) rest_warp = noscale_warp @ partial_scale k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sx) k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sy) temp = cv2.GaussianBlur(temp, (k_x, k_y), sigma_x, sigma_y) temp = kwarray.atleast_nd(temp, 3) interpolation = 'cubic' flags = im_cv2._coerce_interpolation(interpolation) final_v3 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 4 - dont do the final blur for timer in ti.reset('pyrDown+warp'): with timer: temp = image.copy() params = transform.decompose() sx, sy = params['scale'] biggest_scale = max(sx, sy) num_downscales = max(int(np.log2(1 / biggest_scale)), 0) pyr_scale = 1 / (2 ** num_downscales) # Does the gaussian downsampling temp = pyrDownK(image, num_downscales) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale partial_scale = Affine.scale((rest_sx, rest_sy)) rest_warp = noscale_warp @ partial_scale interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v4 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags) if 1: def get_title(key): from ubelt.timerit import _choose_unit value = ti.measures['mean'][key] suffix, mag = _choose_unit(value) unit_val = value / mag return key + ' ' + ub.repr2(unit_val, precision=2) + ' ' + suffix final_v2 = final_v2.clip(0, 1) final_v1 = final_v1.clip(0, 1) final_v3 = final_v3.clip(0, 1) final_v4 = final_v4.clip(0, 1) final_v5 = final_v5.clip(0, 1) import kwplot kwplot.autompl() kwplot.imshow(final_v5, pnum=(1, 5, 1), title=get_title('naive')) kwplot.imshow(final_v2, pnum=(1, 5, 2), title=get_title('fullblur+warp')) kwplot.imshow(final_v1, pnum=(1, 5, 3), title=get_title('resize+warp')) kwplot.imshow(final_v3, pnum=(1, 5, 4), title=get_title('pyrDown+blur+warp')) kwplot.imshow(final_v4, pnum=(1, 5, 5), title=get_title('pyrDown+warp'))
def warp_affine(image, transform, dsize=None, antialias=True, interpolation='linear'): """ Applies an affine transformation to an image with optional antialiasing. Args: image (ndarray): the input image transform (ndarray | Affine): a coercable affine matrix dsize (Tuple[int, int] | None | str): width and height of the resulting image. If "auto", it is computed such that the positive coordinates of the warped image will fit in the new canvas. If None, then the image size will not change. antialias (bool, default=True): if True determines if the transform is downsampling and applies antialiasing via gaussian a blur. TODO: - [ ] This will be moved to kwimage.im_cv2 Example: >>> import kwimage >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale(0.05) >>> transform = Affine.scale(0.02) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest') >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() Example: >>> import kwimage >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale((.1, 1.2)) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1) >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() """ from kwimage import im_cv2 from kwimage.transform import Affine import kwimage import numpy as np import cv2 import ubelt as ub transform = Affine.coerce(transform) flags = im_cv2._coerce_interpolation(interpolation) # TODO: expose these params # borderMode = cv2.BORDER_DEFAULT # borderMode = cv2.BORDER_CONSTANT borderMode = None borderValue = None """ Variations that could change in the future: * In _gauss_params I'm not sure if we want to compute integer or fractional "number of downsamples". * The fudge factor bothers me, but seems necessary """ def _gauss_params(scale, k0=5, sigma0=1, fractional=True): # Compute a gaussian to mitigate aliasing for a requested downsample # Args: # scale: requested downsample factor # k0 (int): kernel size for one downsample operation # sigma0 (float): sigma for one downsample operation # fractional (bool): controls if we compute params for integer downsample # ops num_downs = np.log2(1 / scale) if not fractional: num_downs = max(int(num_downs), 0) if num_downs <= 0: k = 1 sigma = 0 else: # The kernel size and sigma doubles for each 2x downsample sigma = sigma0 * (2 ** (num_downs - 1)) k = int(np.ceil(k0 * (2 ** (num_downs - 1)))) k = k + int(k % 2 == 0) return k, sigma def _pyrDownK(a, k=1): # Downsamples by (2 ** k)x with antialiasing if k == 0: a = a.copy() for _ in range(k): a = cv2.pyrDown(a) return a if dsize is None: dsize = tuple(image.shape[0:2][::-1]) elif dsize == 'auto': h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) if not antialias: M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # Decompose the affine matrix into its 6 core parameters params = transform.decompose() sx, sy = params['scale'] if sx >= 1 and sy > 1: # No downsampling detected, no need to antialias M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # At least one dimension is downsampled # Compute the transform with all scaling removed noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'})) max_scale = max(sx, sy) # The "fudge" factor limits the number of downsampled pyramid # operations. A bigger fudge factor means means that the final # gaussian kernel for the antialiasing operation will be bigger. # It essentials say that at most "fudge" downsampling ops will # be handled by the final blur rather than the pyramid downsample. # It seems to help with border effects at only a small runtime cost # I don't entirely understand why the border artifact is introduced # when this is enabled though # TODO: should we allow for this fudge factor? # TODO: what is the real name of this? num_down_prevent ? # skip_final_downs? fudge = 2 # TODO: should final antialiasing be on? # Note, if fudge is non-zero it is important to do this. do_final_aa = 1 # TODO: should fractional be True or False by default? # If fudge is 0 and fractional=0, then I think is the same as # do_final_aa=0. fractional = 0 num_downs = max(int(np.log2(1 / max_scale)) - fudge, 0) pyr_scale = 1 / (2 ** num_downs) # Downsample iteratively with antialiasing downscaled = _pyrDownK(image, num_downs) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale # Compute the transform from the downsampled image to the destination rest_warp = noscale_warp @ Affine.scale((rest_sx, rest_sy)) # Do a final small blur to acount for the potential aliasing # in any remaining scaling operations. if do_final_aa: # Computed as the closest sigma to the [1, 4, 6, 4, 1] approx # used in cv2.pyrDown aa_sigma0 = 1.0565137190917149 aa_k0 = 5 k_x, sigma_x = _gauss_params(scale=rest_sx, k0=aa_k0, sigma0=aa_sigma0, fractional=fractional) k_y, sigma_y = _gauss_params(scale=rest_sy, k0=aa_k0, sigma0=aa_sigma0, fractional=fractional) # Note: when k=1, no blur occurs # blurBorderType = cv2.BORDER_REPLICATE # blurBorderType = cv2.BORDER_CONSTANT blurBorderType = cv2.BORDER_DEFAULT downscaled = cv2.GaussianBlur( downscaled, (k_x, k_y), sigma_x, sigma_y, borderType=blurBorderType ) result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) return result
def warp_affine(image, transform, dsize=None, antialias=False, interpolation='linear'): """ Applies an affine transformation to an image with optional antialiasing. Args: image (ndarray): the input image transform (ndarray | Affine): a coercable affine matrix dsize (Tuple[int, int] | None | str): width and height of the resulting image. If "auto", it is computed such that the positive coordinates of the warped image will fit in the new canvas. If None, then the image size will not change. antialias (bool, default=False): if True determines if the transform is downsampling and applies antialiasing via gaussian a blur. interpolation (str): interpolation code or cv2 integer. Interpolation codes are linear, nearest, cubic, lancsoz, and area. Example: >>> from kwimage.im_cv2 import * # NOQA >>> import kwimage >>> from kwimage.transform import Affine >>> image = kwimage.grab_test_image('astro') >>> #image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale(0.05) >>> transform = Affine.scale(0.02) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest') >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() Example: >>> from kwimage.im_cv2 import * # NOQA >>> import kwimage >>> from kwimage.transform import Affine >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale((.1, 1.2)) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1) >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() """ from kwimage import im_cv2 from kwimage.transform import Affine import kwimage transform = Affine.coerce(transform) flags = im_cv2._coerce_interpolation(interpolation) # TODO: expose these params # borderMode = cv2.BORDER_DEFAULT # borderMode = cv2.BORDER_CONSTANT borderMode = None borderValue = None """ Variations that could change in the future: * In _gauss_params I'm not sure if we want to compute integer or fractional "number of downsamples". * The fudge factor bothers me, but seems necessary """ if dsize is None: dsize = tuple(image.shape[0:2][::-1]) elif dsize == 'auto': h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) if not antialias: M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # Decompose the affine matrix into its 6 core parameters params = transform.decompose() sx, sy = params['scale'] if sx >= 1 and sy > 1: # No downsampling detected, no need to antialias M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # At least one dimension is downsampled # Compute the transform with all scaling removed noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'})) # Execute part of the downscale with iterative pyramid downs downscaled, residual_sx, residual_sy = _prepare_downscale( image, sx, sy) # Compute the transform from the downsampled image to the destination rest_warp = noscale_warp @ Affine.scale((residual_sx, residual_sy)) result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) return result
def benchmark_dict_diff_impl(): import ubelt as ub import pandas as pd import timerit import random def method_diffkeys(*args): first_dict = args[0] keys = set(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_diffkeys_list(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) keep_keys = [k for k in first_dict.keys() if k not in remove_keys] new = dict((k, first_dict[k]) for k in keep_keys) return new def method_diffkeys_oset(*args): first_dict = args[0] keys = ub.oset(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_ifkeys_setcomp(*args): first_dict = args[0] remove_keys = {k for ks in args[1:] for k in ks} new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new1 def method_ifkeys_setunion(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new2 def method_ifkeys_getitem(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) return new3 def method_ifkeys_dictcomp(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: v for k, v in first_dict.items() if k not in remove_keys} return new4 def method_ifkeys_dictcomp_getitem(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys} return new4 method_lut = locals() # can populate this some other way def make_data(num_items, num_other, remove_fraction, keytype): if keytype == 'str': keytype = str if keytype == 'int': keytype = int first_keys = [random.randint(0, 1000) for _ in range(num_items)] k = int(remove_fraction * len(first_keys)) remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)] first_dict = {keytype(k): k for k in first_keys} args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets] return args ti = timerit.Timerit(200, bestof=1, verbose=2) basis = { 'method': [ # Cant use because unordered # 'method_diffkeys', # Cant use because python 3.6 'method_ifkeys_dictcomp', 'method_ifkeys_dictcomp_getitem', 'method_ifkeys_setunion', 'method_ifkeys_getitem', 'method_diffkeys_list', # Probably not good # 'method_ifkeys_setcomp', # 'method_diffkeys_oset', ], 'num_items': [10, 100, 1000], 'num_other': [1, 3, 5], # 'num_other': [1], 'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0], # 'remove_fraction': [0.2, 0.8], 'keytype': ['str', 'int'], # 'keytype': ['str'], # 'param_name': [param values], } xlabel = 'num_items' kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype'] group_labels = { 'style': ['num_other', 'keytype'], 'size': ['remove_fraction'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) args = make_data(**kwargs) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(*args) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) # for each parameter setting, group all methods with that used those exact # comparable params. Then rank how good each method did. That will be a # preference profile. We will give that preference profile a weight (e.g. # based on the fastest method in the bunch) and then aggregate them with # some voting method. USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} weighted_rankings = ub.ddict(lambda: ub.ddict(float)) for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) # Choose a ranking weight scheme weight = variants['mean'].min() # weight = 1 for rank, method in enumerate(ranking): weighted_rankings[method][rank] += weight weighted_rankings[method]['total'] += weight # Probably a more robust voting method to do this weight_rank_rows = [] for method_name, ranks in weighted_rankings.items(): weights = ub.dict_diff(ranks, ['total']) p_rank = ub.map_vals(lambda w: w / ranks['total'], weights) for rank, w in p_rank.items(): weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name}) weight_rank_df = pd.DataFrame(weight_rank_rows) piv = weight_rank_df.pivot(['name'], ['rank'], ['weight']) print(piv) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('skill_agg =\n{}'.format(skill_agg)) aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values() print('weight aggregated =\n{}'.format(aggregated)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('A better x-variable description') ax.set_ylabel('A better y-variable description')