Пример #1
0
    def main(cls, cmdline=True, **kw):
        """
        Example:
            >>> # xdoctest: +SKIP
            >>> kw = {'src': 'special:shapes8'}
            >>> cmdline = False
            >>> cls = CocoConformCLI
            >>> cls.main(cmdline, **kw)
        """
        import kwcoco

        config = cls.CLIConfig(kw, cmdline=cmdline)
        print('config = {}'.format(ub.repr2(dict(config), nl=1)))

        if config['src'] is None:
            raise Exception('must specify source: {}'.format(config['src']))
        if config['dst'] is None:
            raise Exception('must specify dest: {}'.format(config['dst']))

        dset = kwcoco.CocoDataset.coerce(config['src'])

        config_ = ub.dict_diff(config, {'src', 'dst'})
        dset.conform(**config_)

        dset.fpath = config['dst']
        print('dump dset.fpath = {!r}'.format(dset.fpath))
        dset.dump(dset.fpath, newlines=True)
Пример #2
0
    def main(cls, cmdline=True, **kw):
        """
        Example:
            >>> from kwcoco.cli.coco_validate import *  # NOQA
            >>> kw = {'src': 'special:shapes8'}
            >>> cmdline = False
            >>> cls = CocoValidateCLI
            >>> cls.main(cmdline, **kw)
        """
        import kwcoco
        config = cls.CLIConfig(kw, cmdline=cmdline)
        print('config = {}'.format(ub.repr2(dict(config), nl=1)))

        if config['src'] is None:
            raise Exception('must specify source: {}'.format(config['src']))

        if isinstance(config['src'], str):
            fpaths = [config['src']]
        else:
            fpaths = config['src']

        if config['dst']:
            if len(fpaths) != 1:
                raise Exception('can only specify 1 dataset in fix mode')

        fix_strat = set()
        if config['fix'] is not None:
            fix_strat = {c.lower() for c in config['fix'].split('+')}

        for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1):
            print('reading fpath = {!r}'.format(fpath))
            dset = kwcoco.CocoDataset.coerce(fpath)

            config_ = ub.dict_diff(config, {'src', 'dst', 'fix'})
            result = dset.validate(**config_)

            if 'missing' in result:
                if 'remove' in fix_strat:
                    missing = result['missing']
                    bad_gids = [t[2] for t in missing]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if 'corrupted' in result:
                if 'remove' in fix_strat:
                    corrupted = result['corrupted']
                    bad_gids = [t[2] for t in corrupted]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if config['dst']:
                if len(fpaths) != 1:
                    raise Exception('can only specify 1 dataset in fix mode')
                dset.dump(config['dst'], newlines=True)

            errors = result['errors']
            if errors:
                print('result = {}'.format(ub.repr2(result, nl=-1)))
                raise Exception('\n'.join(errors))
Пример #3
0
 def populate_from(self, dset):
     from sqlalchemy import inspect
     session = self.session
     inspector = inspect(self.engine)
     for key in self.engine.table_names():
         colinfo = inspector.get_columns(key)
         colnames = {c['name'] for c in colinfo}
         # TODO: is there a better way to grab this information?
         cls = TBLNAME_TO_CLASS[key]
         for item in dset.dataset.get(key, []):
             item_ = ub.dict_isect(item, colnames)
             # Everything else is a foreign key
             item['foreign'] = ub.dict_diff(item, item_)
             if key == 'annotations':
                 # Need custom code to translate list-based properties
                 x, y, w, h = item['bbox']
                 item_['bbox_x'] = x
                 item_['bbox_y'] = y
                 item_['bbox_w'] = w
                 item_['bbox_h'] = h
             row = cls(**item_)
             session.add(row)
     session.commit()
Пример #4
0
def main():
    # TODO: progressive hashing data structure
    inv1 = Inventory('/media/joncrall/raid/', blocklist)
    inv2 = Inventory('/media/joncrall/media', blocklist)

    # inv1 = Inventory('/media/joncrall/raid/Applications/NotGames', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications/NotGames', blocklist)
    # inv1 = Inventory('/media/joncrall/raid/Applications', blocklist)
    # inv2 = Inventory('/media/joncrall/media/Applications', blocklist)

    self = inv1  # NOQA

    inv1.build()
    inv2.build()

    thresh = {
        'frac': 0.5,
        'byte':
        100 * int(2**20)  # only use the first few mb to determine overlap
    }
    verbose = 1
    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles
    overlap, only1, only2 = ProgressiveFile.likely_overlaps(pfiles1,
                                                            pfiles2,
                                                            thresh=thresh,
                                                            verbose=verbose)

    stats = {
        'overlap': len(overlap),
        'only1': len(only1),
        'only2': len(only2),
    }
    print('stats = {}'.format(ub.repr2(stats, nl=1)))
    only2_list = sorted([p.fpath for group in only2.values() for p in group])
    print('only2_list = {}'.format(ub.repr2(only2_list, nl=1)))
    print('stats = {}'.format(ub.repr2(stats, nl=1)))

    # for pfile in inv1.pfiles:
    #     pfile._check_integrity()

    import numpy as np
    mb_read = np.array([
        pfile._parts[-1][1] / int(2**20) for pfile in ub.ProgIter(inv2.pfiles)
    ])
    mb_read.max()
    mb_read.min()

    # Build all hashes up to a reasonable degree
    inv1.build_hashes(max_workers=0)

    maybe_dups = inv1.likely_duplicates(thresh=0.2)
    len(maybe_dups)

    maybe_dups = ub.sorted_keys(maybe_dups, key=lambda x: x[2])

    import networkx as nx
    import itertools as it
    # Check which directories are most likely to be duplicates
    graph = nx.Graph()

    for key, group in ub.ProgIter(maybe_dups.items(),
                                  total=len(maybe_dups),
                                  desc='build dup dir graph'):
        if key[0] == '':
            continue
        dpaths = [dirname(pfile.fpath) for pfile in group]
        for d1, d2 in it.combinations(dpaths, 2):
            graph.add_edge(d1, d2)
            edge = graph.edges[(d1, d2)]
            if 'dups' not in edge:
                edge['dups'] = 0
            edge['dups'] += 1

    edge_data = list(graph.edges(data=True))

    for dpath in ub.ProgIter(graph.nodes, desc='find lens'):
        num_children = len(os.listdir(dpath))
        graph.nodes[dpath]['num_children'] = num_children

    for d1, d2, dat in edge_data:
        nc1 = graph.nodes[d1]['num_children']
        nc2 = graph.nodes[d2]['num_children']
        ndups = dat['dups']
        dup_score = (dat['dups'] / min(nc1, nc2))
        dat['dup_score'] = dup_score
        if dup_score > 0.9:
            print('dup_score = {!r}'.format(dup_score))
            print('d1 = {!r}'.format(d1))
            print('d2 = {!r}'.format(d2))
            print('nc1 = {!r}'.format(nc1))
            print('nc2 = {!r}'.format(nc2))
            print('ndups = {!r}'.format(ndups))

    print('edge_data = {}'.format(ub.repr2(edge_data, nl=2)))

    print('maybe_dups = {}'.format(ub.repr2(maybe_dups.keys(), nl=3)))
    for key, group in maybe_dups.items():
        if key[0] == '':
            continue
        print('key = {!r}'.format(key))
        print('group = {}'.format(ub.repr2(group, nl=1)))
        for pfile in group:
            pfile.refined_to(float('inf'))

        print('key = {!r}'.format(key))

    inv2.build_hashes(max_workers=6, mode='thread')

    inv1.pfiles = [
        p for p in ub.ProgIter(inv1.pfiles, desc='exist check')
        if exists(p.fpath)
    ]
    inv2.pfiles = [
        p for p in ub.ProgIter(inv2.pfiles, desc='exist check')
        if exists(p.fpath)
    ]

    pfiles1 = inv1.pfiles
    pfiles2 = inv2.pfiles

    def compute_likely_overlaps(pfiles1, pfiles2):
        step_idx1 = ProgressiveFile.compatible_step_idx(pfiles1)
        step_idx2 = ProgressiveFile.compatible_step_idx(pfiles2)
        step_idx = min(step_idx1, step_idx2)
        grouped1 = ProgressiveFile.group_pfiles(pfiles1, step_idx=step_idx)
        grouped2 = ProgressiveFile.group_pfiles(pfiles2, step_idx=step_idx)

        thresh = 0.2
        verbose = 1

        # TODO: it would be nice if we didn't have to care about internal
        # deduplication when we attempt to find cross-set overlaps
        dups1 = ProgressiveFile.likely_duplicates(inv1.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)
        dups2 = ProgressiveFile.likely_duplicates(inv2.pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        pfiles = inv1.pfiles + inv2.pfiles
        dups3 = ProgressiveFile.likely_duplicates(pfiles,
                                                  thresh=thresh,
                                                  verbose=verbose)

        only_on_inv2 = {}
        for key, group in dups3.items():
            if not any(
                    item.fpath.startswith(inv1.root_fpath) for item in group):
                only_on_inv2[key] = group

        for p1 in inv1.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p1.fpath:
                break

        for p2 in inv2.pfiles:
            if 'Chase HQ 2 (JUE) [!].zip' in p2.fpath:
                break

        look = list(ub.flatten(only_on_inv2.values()))
        takealook = sorted([p.fpath for p in look])
        print('takealook = {}'.format(ub.repr2(takealook, nl=1)))

        keys1 = set(grouped1)
        keys2 = set(grouped2)

        missing_keys2 = keys2 - keys1
        missing_groups2 = ub.dict_subset(grouped2, missing_keys2)

        missing_fpaths2 = []
        for key, values in missing_groups2.items():
            print('key = {!r}'.format(key))
            print('values = {}'.format(ub.repr2(values, nl=1)))
            missing_fpaths2.extend(values)

        missing_fpaths2 = sorted([p.fpath for p in missing_fpaths2])
        print('missing_fpaths2 = {}'.format(ub.repr2(missing_fpaths2, nl=1)))
        # pass

        import xdev
        set_overlaps = xdev.set_overlaps(keys1, keys2)
        print('set_overlaps = {}'.format(ub.repr2(set_overlaps, nl=1)))
        # We want to know what files in set2 do not exist in set1

    if 0:
        fpath = inv1.all_fpaths[0]
        pfile = ProgressiveFile(fpath)

        fpath1 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Transfer/Zebras/DownloadedLibraries/lightspeed/solve_triu.m'
        fpath2 = '/media/joncrall/raid/unsorted/yet-another-backup/card-usb-drive/Zebras/downloaded_libraries/lightspeed/solve_triu.m'

        fpath1 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Falco/DarkFalco02.pcs'
        fpath2 = '/media/joncrall/raid/Applications/Wii/WiiHacksAndStuff/CurrentHacks/Ivysaur/Kraid-v2-Ivy.pcs'

        pfile = pfile1 = ProgressiveFile(fpath1)
        pfile2 = ProgressiveFile(fpath2)

        pfile.maybe_equal(pfile2, thresh=0.1)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]
        # fpaths = hash_groups1_dup['ef46db3751d8e999']
        pfiles_demodata = [ProgressiveFile(f) for f in fpath_demodata]

        def progressive_duplicates(pfiles, idx=1):
            step_ids = [pfile.refined_to(idx) for pfile in ub.ProgIter(pfiles)]
            final_groups = {}
            grouped = ub.group_items(pfiles, step_ids)
            for key, group in grouped.items():
                if len(group) > 1:
                    if all(not g.can_refine for g in group):
                        # Group is ~100% a real duplicate
                        final_groups[key] = group
                    else:
                        pfiles = group
                        deduped = progressive_duplicates(pfiles, idx=idx + 1)
                        final_groups.update(deduped)
                else:
                    final_groups[key] = group
            return final_groups

        pfiles = pfiles_demodata
        final_groups = progressive_duplicates(pfiles)

        for key, group in final_groups.items():
            if len(group) > 1:
                print('key = {!r}'.format(key))
                print('group = {}'.format(ub.repr2(group, nl=1)))

        inv1.build_hashes()
        inv2.build_hashes()

        hash_groups1 = ub.group_items(inv1.all_fpaths, inv1.all_hashes)
        hash_groups2 = ub.group_items(inv2.all_fpaths, inv2.all_hashes)

        hash_groups1_dup = {
            k: v
            for k, v in hash_groups1.items() if len(v) > 1
        }
        hash_groups2_dup = {
            k: v
            for k, v in hash_groups2.items() if len(v) > 1
        }
        len(hash_groups1_dup)
        len(hash_groups2_dup)

        # common = set(hash_groups1) & set(hash_groups2)
        # xdev.set_overlaps(hash_groups1, hash_groups2)

        fnames1 = ub.group_items(inv1.all_fpaths, key=basename)
        fnames2 = ub.group_items(inv2.all_fpaths, key=basename)

        missing = ub.dict_diff(fnames2, fnames1)
        sorted(ub.flatten(missing.values()))
        len(missing)

        fpath_demodata = inv1.all_fpaths[::len(inv1.all_fpaths) // 500]

        def internal_deduplicate(self):
            hash_groups = ub.group_items(self.all_fpaths, self.all_hashes)
            hash_groups_dup = {
                k: v
                for k, v in hash_groups.items() if len(v) > 1
            }

            from os.path import dirname

            hash_groups_dup['ef46db3751d8e999']

            for key, values in hash_groups_dup.items():
                for v in values:
                    if v.endswith('.avi'):
                        break

                [basename(v) for v in values]
                [dirname(v) for v in values]
Пример #5
0
def 字典_差集(*args):
    # 字典_差集({'a': 1, 'b': 1}, {'a'}, {'c'})
    data = ub.dict_diff(*args)
    return data
Пример #6
0
def warp_image_test(image, transform, dsize=None):
    """

    from kwimage.transform import Affine
    import kwimage
    image = kwimage.grab_test_image('checkerboard', dsize=(2048, 2048)).astype(np.float32)
    image = kwimage.grab_test_image('astro', dsize=(2048, 2048))
    transform = Affine.random() @ Affine.scale(0.01)

    """
    from kwimage.transform import Affine
    import kwimage
    import numpy as np
    import ubelt as ub

    # Choose a random affine transform that probably has a small scale
    # transform = Affine.random() @ Affine.scale((0.3, 2))
    # transform = Affine.scale((0.1, 1.2))
    # transform = Affine.scale(0.05)
    transform = Affine.random() @ Affine.scale(0.01)
    # transform = Affine.random()

    image = kwimage.grab_test_image('astro')
    image = kwimage.grab_test_image('checkerboard')

    image = kwimage.ensure_float01(image)

    from kwimage import im_cv2
    import kwarray
    import cv2
    transform = Affine.coerce(transform)

    if 1 or dsize is None:
        h, w = image.shape[0:2]

        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    import timerit
    ti = timerit.Timerit(10, bestof=3, verbose=2)

    def _full_gauss_kernel(k0, sigma0, scale):
        num_downscales = np.log2(1 / scale)
        if num_downscales < 0:
            return 1, 0

        # Define b0 = kernel size for one downsample operation
        b0 = 5
        # Define sigma0 = sigma for one downsample operation
        sigma0 = 1

        # The kernel size and sigma doubles for each 2x downsample
        k = int(np.ceil(b0 * (2 ** (num_downscales - 1))))
        sigma = sigma0 * (2 ** (num_downscales - 1))

        if k % 2 == 0:
            k += 1
        return k, sigma

    def pyrDownK(a, k=1):
        assert k >= 0
        for _ in range(k):
            a = cv2.pyrDown(a)
        return a

    for timer in ti.reset('naive'):
        with timer:
            interpolation = 'nearest'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v5 = cv2.warpAffine(image, transform.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 1
    #
    for timer in ti.reset('resize+warp'):
        with timer:
            params = transform.decompose()

            sx, sy = params['scale']
            noscale_params = ub.dict_diff(params, {'scale'})
            noscale_warp = Affine.affine(**noscale_params)

            h, w = image.shape[0:2]
            resize_dsize = (int(np.ceil(sx * w)), int(np.ceil(sy * h)))

            downsampled = cv2.resize(image, dsize=resize_dsize, fx=sx, fy=sy,
                                     interpolation=cv2.INTER_AREA)

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v1 = cv2.warpAffine(downsampled, noscale_warp.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 2
    for timer in ti.reset('fullblur+warp'):
        with timer:
            k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=sx)
            k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=sy)
            image_ = image.copy()
            image_ = cv2.GaussianBlur(image_, (k_x, k_y), sigma_x, sigma_y)
            image_ = kwarray.atleast_nd(image_, 3)
            # image_ = image_.clip(0, 1)

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v2 = cv2.warpAffine(image_, transform.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 3

    for timer in ti.reset('pyrDown+blur+warp'):
        with timer:
            temp = image.copy()
            params = transform.decompose()
            sx, sy = params['scale']

            biggest_scale = max(sx, sy)
            # The -2 allows the gaussian to be a little bigger. This
            # seems to help with border effects at only a small runtime cost
            num_downscales = max(int(np.log2(1 / biggest_scale)) - 2, 0)
            pyr_scale = 1 / (2 ** num_downscales)

            # Does the gaussian downsampling
            temp = pyrDownK(image, num_downscales)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            partial_scale = Affine.scale((rest_sx, rest_sy))
            rest_warp = noscale_warp @ partial_scale

            k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sx)
            k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sy)
            temp = cv2.GaussianBlur(temp, (k_x, k_y), sigma_x, sigma_y)
            temp = kwarray.atleast_nd(temp, 3)

            interpolation = 'cubic'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v3 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize,
                                      flags=flags)

    # --------------------
    # METHOD 4 - dont do the final blur

    for timer in ti.reset('pyrDown+warp'):
        with timer:
            temp = image.copy()
            params = transform.decompose()
            sx, sy = params['scale']

            biggest_scale = max(sx, sy)
            num_downscales = max(int(np.log2(1 / biggest_scale)), 0)
            pyr_scale = 1 / (2 ** num_downscales)

            # Does the gaussian downsampling
            temp = pyrDownK(image, num_downscales)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            partial_scale = Affine.scale((rest_sx, rest_sy))
            rest_warp = noscale_warp @ partial_scale

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v4 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags)

    if 1:

        def get_title(key):
            from ubelt.timerit import _choose_unit
            value = ti.measures['mean'][key]
            suffix, mag = _choose_unit(value)
            unit_val = value / mag

            return key + ' ' + ub.repr2(unit_val, precision=2) + ' ' + suffix

        final_v2 = final_v2.clip(0, 1)
        final_v1 = final_v1.clip(0, 1)
        final_v3 = final_v3.clip(0, 1)
        final_v4 = final_v4.clip(0, 1)
        final_v5 = final_v5.clip(0, 1)
        import kwplot
        kwplot.autompl()
        kwplot.imshow(final_v5, pnum=(1, 5, 1), title=get_title('naive'))
        kwplot.imshow(final_v2, pnum=(1, 5, 2), title=get_title('fullblur+warp'))
        kwplot.imshow(final_v1, pnum=(1, 5, 3), title=get_title('resize+warp'))
        kwplot.imshow(final_v3, pnum=(1, 5, 4), title=get_title('pyrDown+blur+warp'))
        kwplot.imshow(final_v4, pnum=(1, 5, 5), title=get_title('pyrDown+warp'))
Пример #7
0
def warp_affine(image, transform, dsize=None, antialias=True,
                interpolation='linear'):
    """
    Applies an affine transformation to an image with optional antialiasing.

    Args:
        image (ndarray): the input image

        transform (ndarray | Affine): a coercable affine matrix

        dsize (Tuple[int, int] | None | str):
            width and height of the resulting image. If "auto", it is computed
            such that the positive coordinates of the warped image will fit in
            the new canvas. If None, then the image size will not change.

        antialias (bool, default=True):
            if True determines if the transform is downsampling and applies
            antialiasing via gaussian a blur.

    TODO:
        - [ ] This will be moved to kwimage.im_cv2

    Example:
        >>> import kwimage
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale(0.05)
        >>> transform = Affine.scale(0.02)
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest')
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()

    Example:
        >>> import kwimage
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale((.1, 1.2))
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1)
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()
    """
    from kwimage import im_cv2
    from kwimage.transform import Affine
    import kwimage
    import numpy as np
    import cv2
    import ubelt as ub
    transform = Affine.coerce(transform)
    flags = im_cv2._coerce_interpolation(interpolation)

    # TODO: expose these params
    # borderMode = cv2.BORDER_DEFAULT
    # borderMode = cv2.BORDER_CONSTANT
    borderMode = None
    borderValue = None

    """
    Variations that could change in the future:

        * In _gauss_params I'm not sure if we want to compute integer or
            fractional "number of downsamples".

        * The fudge factor bothers me, but seems necessary
    """

    def _gauss_params(scale, k0=5, sigma0=1, fractional=True):
        # Compute a gaussian to mitigate aliasing for a requested downsample
        # Args:
        # scale: requested downsample factor
        # k0 (int): kernel size for one downsample operation
        # sigma0 (float): sigma for one downsample operation
        # fractional (bool): controls if we compute params for integer downsample
        # ops
        num_downs = np.log2(1 / scale)
        if not fractional:
            num_downs = max(int(num_downs), 0)
        if num_downs <= 0:
            k = 1
            sigma = 0
        else:
            # The kernel size and sigma doubles for each 2x downsample
            sigma = sigma0 * (2 ** (num_downs - 1))
            k = int(np.ceil(k0 * (2 ** (num_downs - 1))))
            k = k + int(k % 2 == 0)
        return k, sigma

    def _pyrDownK(a, k=1):
        # Downsamples by (2 ** k)x with antialiasing
        if k == 0:
            a = a.copy()
        for _ in range(k):
            a = cv2.pyrDown(a)
        return a

    if dsize is None:
        dsize = tuple(image.shape[0:2][::-1])
    elif dsize == 'auto':
        h, w = image.shape[0:2]
        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    if not antialias:
        M = np.asarray(transform)
        result = cv2.warpAffine(image, M[0:2],
                                dsize=dsize, flags=flags,
                                borderMode=borderMode,
                                borderValue=borderValue)
    else:
        # Decompose the affine matrix into its 6 core parameters
        params = transform.decompose()
        sx, sy = params['scale']

        if sx >= 1 and sy > 1:
            # No downsampling detected, no need to antialias
            M = np.asarray(transform)
            result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)
        else:
            # At least one dimension is downsampled

            # Compute the transform with all scaling removed
            noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'}))

            max_scale = max(sx, sy)
            # The "fudge" factor limits the number of downsampled pyramid
            # operations. A bigger fudge factor means means that the final
            # gaussian kernel for the antialiasing operation will be bigger.
            # It essentials say that at most "fudge" downsampling ops will
            # be handled by the final blur rather than the pyramid downsample.
            # It seems to help with border effects at only a small runtime cost
            # I don't entirely understand why the border artifact is introduced
            # when this is enabled though

            # TODO: should we allow for this fudge factor?
            # TODO: what is the real name of this? num_down_prevent ?
            # skip_final_downs?
            fudge = 2
            # TODO: should final antialiasing be on?
            # Note, if fudge is non-zero it is important to do this.
            do_final_aa = 1
            # TODO: should fractional be True or False by default?
            # If fudge is 0 and fractional=0, then I think is the same as
            # do_final_aa=0.
            fractional = 0

            num_downs = max(int(np.log2(1 / max_scale)) - fudge, 0)
            pyr_scale = 1 / (2 ** num_downs)

            # Downsample iteratively with antialiasing
            downscaled = _pyrDownK(image, num_downs)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            # Compute the transform from the downsampled image to the destination
            rest_warp = noscale_warp @ Affine.scale((rest_sx, rest_sy))

            # Do a final small blur to acount for the potential aliasing
            # in any remaining scaling operations.
            if do_final_aa:
                # Computed as the closest sigma to the [1, 4, 6, 4, 1] approx
                # used in cv2.pyrDown
                aa_sigma0 = 1.0565137190917149
                aa_k0 = 5
                k_x, sigma_x = _gauss_params(scale=rest_sx, k0=aa_k0,
                                             sigma0=aa_sigma0,
                                             fractional=fractional)
                k_y, sigma_y = _gauss_params(scale=rest_sy, k0=aa_k0,
                                             sigma0=aa_sigma0,
                                             fractional=fractional)

                # Note: when k=1, no blur occurs
                # blurBorderType = cv2.BORDER_REPLICATE
                # blurBorderType = cv2.BORDER_CONSTANT
                blurBorderType = cv2.BORDER_DEFAULT
                downscaled = cv2.GaussianBlur(
                    downscaled, (k_x, k_y), sigma_x, sigma_y,
                    borderType=blurBorderType
                )

            result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2],
                                    dsize=dsize, flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)

    return result
Пример #8
0
def warp_affine(image,
                transform,
                dsize=None,
                antialias=False,
                interpolation='linear'):
    """
    Applies an affine transformation to an image with optional antialiasing.

    Args:
        image (ndarray): the input image

        transform (ndarray | Affine): a coercable affine matrix

        dsize (Tuple[int, int] | None | str):
            width and height of the resulting image. If "auto", it is computed
            such that the positive coordinates of the warped image will fit in
            the new canvas. If None, then the image size will not change.

        antialias (bool, default=False):
            if True determines if the transform is downsampling and applies
            antialiasing via gaussian a blur.

        interpolation (str):
            interpolation code or cv2 integer. Interpolation codes are linear,
            nearest, cubic, lancsoz, and area.

    Example:
        >>> from kwimage.im_cv2 import *  # NOQA
        >>> import kwimage
        >>> from kwimage.transform import Affine
        >>> image = kwimage.grab_test_image('astro')
        >>> #image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale(0.05)
        >>> transform = Affine.scale(0.02)
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest')
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()

    Example:
        >>> from kwimage.im_cv2 import *  # NOQA
        >>> import kwimage
        >>> from kwimage.transform import Affine
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale((.1, 1.2))
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1)
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()
    """
    from kwimage import im_cv2
    from kwimage.transform import Affine
    import kwimage
    transform = Affine.coerce(transform)
    flags = im_cv2._coerce_interpolation(interpolation)

    # TODO: expose these params
    # borderMode = cv2.BORDER_DEFAULT
    # borderMode = cv2.BORDER_CONSTANT
    borderMode = None
    borderValue = None
    """
    Variations that could change in the future:

        * In _gauss_params I'm not sure if we want to compute integer or
            fractional "number of downsamples".

        * The fudge factor bothers me, but seems necessary
    """

    if dsize is None:
        dsize = tuple(image.shape[0:2][::-1])
    elif dsize == 'auto':
        h, w = image.shape[0:2]
        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    if not antialias:
        M = np.asarray(transform)
        result = cv2.warpAffine(image,
                                M[0:2],
                                dsize=dsize,
                                flags=flags,
                                borderMode=borderMode,
                                borderValue=borderValue)
    else:
        # Decompose the affine matrix into its 6 core parameters
        params = transform.decompose()
        sx, sy = params['scale']

        if sx >= 1 and sy > 1:
            # No downsampling detected, no need to antialias
            M = np.asarray(transform)
            result = cv2.warpAffine(image,
                                    M[0:2],
                                    dsize=dsize,
                                    flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)
        else:
            # At least one dimension is downsampled

            # Compute the transform with all scaling removed
            noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'}))

            # Execute part of the downscale with iterative pyramid downs
            downscaled, residual_sx, residual_sy = _prepare_downscale(
                image, sx, sy)

            # Compute the transform from the downsampled image to the destination
            rest_warp = noscale_warp @ Affine.scale((residual_sx, residual_sy))

            result = cv2.warpAffine(downscaled,
                                    rest_warp.matrix[0:2],
                                    dsize=dsize,
                                    flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)

    return result
Пример #9
0
def benchmark_dict_diff_impl():
    import ubelt as ub
    import pandas as pd
    import timerit
    import random

    def method_diffkeys(*args):
        first_dict = args[0]
        keys = set(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_diffkeys_list(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        keep_keys = [k for k in first_dict.keys() if k not in remove_keys]
        new = dict((k, first_dict[k]) for k in keep_keys)
        return new

    def method_diffkeys_oset(*args):
        first_dict = args[0]
        keys = ub.oset(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_ifkeys_setcomp(*args):
        first_dict = args[0]
        remove_keys = {k for ks in args[1:] for k in ks}
        new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new1

    def method_ifkeys_setunion(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new2

    def method_ifkeys_getitem(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys)
        return new3

    def method_ifkeys_dictcomp(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: v for k, v in first_dict.items() if k not in remove_keys}
        return new4

    def method_ifkeys_dictcomp_getitem(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys}
        return new4

    method_lut = locals()  # can populate this some other way

    def make_data(num_items, num_other, remove_fraction, keytype):
        if keytype == 'str':
            keytype = str
        if keytype == 'int':
            keytype = int
        first_keys = [random.randint(0, 1000) for _ in range(num_items)]
        k = int(remove_fraction * len(first_keys))
        remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)]
        first_dict = {keytype(k): k for k in first_keys}
        args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets]
        return args

    ti = timerit.Timerit(200, bestof=1, verbose=2)

    basis = {
        'method': [
            # Cant use because unordered
            # 'method_diffkeys',

            # Cant use because python 3.6
            'method_ifkeys_dictcomp',
            'method_ifkeys_dictcomp_getitem',

            'method_ifkeys_setunion',
            'method_ifkeys_getitem',
            'method_diffkeys_list',

            # Probably not good
            # 'method_ifkeys_setcomp',
            # 'method_diffkeys_oset',
        ],
        'num_items': [10, 100, 1000],
        'num_other': [1, 3, 5],
        # 'num_other': [1],
        'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0],
        # 'remove_fraction': [0.2, 0.8],
        'keytype': ['str', 'int'],
        # 'keytype': ['str'],
        # 'param_name': [param values],
    }
    xlabel = 'num_items'
    kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype']
    group_labels = {
        'style': ['num_other', 'keytype'],
        'size': ['remove_fraction'],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        args = make_data(**kwargs)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(*args)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    # for each parameter setting, group all methods with that used those exact
    # comparable params. Then rank how good each method did.  That will be a
    # preference profile. We will give that preference profile a weight (e.g.
    # based on the fastest method in the bunch) and then aggregate them with
    # some voting method.

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    weighted_rankings = ub.ddict(lambda: ub.ddict(float))
    for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

        # Choose a ranking weight scheme
        weight = variants['mean'].min()
        # weight = 1
        for rank, method in enumerate(ranking):
            weighted_rankings[method][rank] += weight
            weighted_rankings[method]['total'] += weight

    # Probably a more robust voting method to do this
    weight_rank_rows = []
    for method_name, ranks in weighted_rankings.items():
        weights = ub.dict_diff(ranks, ['total'])
        p_rank = ub.map_vals(lambda w: w / ranks['total'], weights)

        for rank, w in p_rank.items():
            weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name})
    weight_rank_df = pd.DataFrame(weight_rank_rows)
    piv = weight_rank_df.pivot(['name'], ['rank'], ['weight'])
    print(piv)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('skill_agg =\n{}'.format(skill_agg))

    aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values()
    print('weight aggregated =\n{}'.format(aggregated))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('A better x-variable description')
        ax.set_ylabel('A better y-variable description')