예제 #1
0
def test_timerit_verbose():
    from xdoctest.utils import CaptureStdout
    with CaptureStdout() as cap:
        ub.Timerit(3, label='foo', verbose=0).call(lambda: None)
    assert cap.text == ''

    with CaptureStdout() as cap:
        ub.Timerit(3, label='foo', verbose=1).call(lambda: None)
    assert cap.text.count('\n') == 2
    assert cap.text.count('foo') == 1

    with CaptureStdout() as cap:
        ub.Timerit(3, label='foo', verbose=2).call(lambda: None)
    assert cap.text.count('\n') == 3
    assert cap.text.count('foo') == 2

    with CaptureStdout() as cap:
        ub.Timerit(3, label='foo', verbose=3).call(lambda: None)
    assert cap.text.count('\n') == 4
    assert cap.text.count('foo') == 2

    with CaptureStdout() as cap:
        ub.Timerit(3, label='foo', verbose=4).call(lambda: None)
    assert cap.text.count('\n') == 4
    assert cap.text.count('foo') == 2
예제 #2
0
파일: focal.py 프로젝트: Kitware/netharn
def _benchmark_focal_loss():
    import ubelt as ub
    import torch.nn.functional as F
    import netharn as nh
    B, C = 16, 37
    DIMS = (128, 128)
    dim = 1
    inputs = torch.rand(B, C, *DIMS)
    inputs.requires_grad = True
    log_probs = F.log_softmax(inputs, dim=dim)
    targets = (torch.rand(B, *DIMS) * C).long()
    #
    ti = ub.Timerit(20, bestof=3, verbose=1, unit='us')
    #
    devices = [
        nh.XPU.coerce('cuda0'),
        nh.XPU.coerce('cpu'),
    ]
    #
    # Forward
    for xpu in devices:
        log_probs = xpu.move(log_probs)
        targets = xpu.move(targets)
        print(' --- FORWARD ---')
        print('\n\n--- xpu = {!r} ---\n'.format(xpu))
        for timer in ti.reset('F.nll_loss'):
            with timer:
                loss1 = F.nll_loss(log_probs, targets, reduction='none')
                torch.cuda.synchronize()
        for timer in ti.reset('nll_focal_loss(focus=0)'):
            with timer:
                loss2 = nll_focal_loss(log_probs, targets, focus=0, dim=dim)
                torch.cuda.synchronize()
        for timer in ti.reset('nll_focal_loss(focus=2)'):
            with timer:
                loss3 = nll_focal_loss(log_probs, targets, focus=2, dim=dim)
                torch.cuda.synchronize()
    #
    # Backward
    ti = ub.Timerit(5, bestof=1, verbose=1, unit='ms')
    log_probs = F.log_softmax(inputs, dim=dim)
    for xpu in devices:
        print(' --- BACKWARD ---')
        print('\n\n--- xpu = {!r} ---\n'.format(xpu))
        for timer in ti.reset('F.nll_loss'):
            with timer:
                loss1 = F.nll_loss(log_probs, targets, reduction='none')
            loss1.mean().backward(retain_graph=True)
            torch.cuda.synchronize()
        for timer in ti.reset('nll_focal_loss(focus=0)'):
            with timer:
                loss2 = nll_focal_loss(log_probs, targets, focus=0.0, dim=dim)
            loss2.mean().backward(retain_graph=True)
            torch.cuda.synchronize()
        for timer in ti.reset('nll_focal_loss(focus=2)'):
            with timer:
                loss3 = nll_focal_loss(log_probs, targets, focus=2.0, dim=dim)
            loss3.mean().backward(retain_graph=True)
            torch.cuda.synchronize()
예제 #3
0
def compare_loss_speed():
    """
    python ~/code/netharn/netharn/models/yolo2/light_region_loss.py compare_loss_speed

    Example:
        >>> compare_loss_speed()
    """
    from netharn.models.yolo2.light_yolo import Yolo
    import netharn.models.yolo2.light_region_loss
    import lightnet.network
    import ubelt as ub
    torch.random.manual_seed(0)
    network = Yolo(num_classes=2, conf_thresh=4e-2)

    self1 = netharn.models.yolo2.light_region_loss.RegionLoss(
        num_classes=network.num_classes, anchors=network.anchors)
    self2 = lightnet.network.RegionLoss(num_classes=network.num_classes,
                                        anchors=network.anchors)

    # Win, Hin = 416, 416
    Win, Hin = 96, 96

    # ----- More targets -----
    rng = util.ensure_rng(0)
    import netharn as nh

    bsize = 4
    # Make a random semi-realistic set of groundtruth items
    n_targets = [rng.randint(0, 10) for _ in range(bsize)]
    target_list = [
        torch.FloatTensor(
            np.hstack([
                rng.randint(0, network.num_classes, nT)[:, None],
                util.Boxes.random(nT, scale=1.0, rng=rng).data
            ])) for nT in n_targets
    ]
    target = nh.data.collate.padded_collate(target_list)

    im_data = torch.randn(len(target), 3, Hin, Win)
    output = network.forward(im_data)

    self1.iou_mode = 'c'
    for timer in ub.Timerit(100, bestof=10, label='cython_ious'):
        with timer:
            loss_cy = float(self1(output, target))

    self1.iou_mode = 'py'
    for timer in ub.Timerit(100, bestof=10, label='python_ious'):
        with timer:
            loss_py = float(self1(output, target))

    for timer in ub.Timerit(100, bestof=10, label='original'):
        with timer:
            loss_orig = float(self2(output, target))

    print('loss_cy   = {!r}'.format(loss_cy))
    print('loss_py   = {!r}'.format(loss_py))
    print('loss_orig = {!r}'.format(loss_orig))
예제 #4
0
파일: test_time.py 프로젝트: timmr99/ubelt
def test_timerit_default_verbosity():
    from xdoctest.utils import CaptureStdout

    with CaptureStdout() as cap:
        ub.Timerit(10, '').call(lambda: None)
    assert cap.text == '', 'should be quiet by default when label is not given'

    with CaptureStdout() as cap:
        ub.Timerit(10, 'alabel').call(lambda: None)
    assert cap.text != '', 'should be verbose by default when label is given'
예제 #5
0
def benchmark_alphablend_impls():
    """
    Ignore:
        from kwimage.imutil.im_alphablend import *
    """
    from kwimage.im_alphablend import overlay_alpha_images
    from kwimage.im_alphablend import _prep_rgb_alpha
    from kwimage.im_alphablend import _alpha_blend_inplace
    from kwimage.im_alphablend import _alpha_blend_simple
    from kwimage.im_alphablend import _alpha_blend_numexpr1
    from kwimage.im_alphablend import _alpha_blend_numexpr2
    import kwimage
    import xdev
    import ubelt as ub
    H = W = 32
    rng = np.random.RandomState(0)

    rgb1, rgb2 = rng.rand(H, W, 3), rng.rand(H, W, 3)
    alpha1, alpha2 = rng.rand(H, W), rng.rand(H, W)

    dtype = np.float32
    # dtype = np.float64
    rgb1 = rgb1.astype(dtype)
    rgb2 = rgb2.astype(dtype)
    alpha1 = alpha1.astype(dtype)
    alpha2 = alpha2.astype(dtype)

    # If num is set too low it may seem like simple beats inplace, but that is
    # actually not the case. inplace is slightly faster as expected.
    ti = ub.Timerit(num=2000, bestof=100, unit='us', verbose=1)
    ti.reset(label='simple').call(lambda: _alpha_blend_simple(rgb1, alpha1, rgb2, alpha2))
    ti.reset(label='inplace').call(lambda: _alpha_blend_inplace(rgb1, alpha1, rgb2, alpha2))
    ti.reset(label='numexpr1').call(lambda: _alpha_blend_numexpr1(rgb1, alpha1, rgb2, alpha2))
    ti.reset(label='numexpr2').call(lambda: _alpha_blend_numexpr2(rgb1, alpha1, rgb2, alpha2))

    # It looks like the simple algorithm is winning ATM
    ub.Timerit(label='inplace', unit='us').call(
        lambda: overlay_alpha_images(rgb1, rgb2, impl='inplace'))
    ub.Timerit(label='simple', unit='us').call(
        lambda: overlay_alpha_images(rgb1, rgb2, impl='simple'))

    _ = xdev.profile_now(overlay_alpha_images)(rgb1, rgb2, impl='simple')
    _ = xdev.profile_now(overlay_alpha_images)(rgb1, rgb2, impl='inplace')

    _ = xdev.profile_now(kwimage.ensure_float01)(rgb1)
    _ = xdev.profile_now(_prep_rgb_alpha)(rgb1)
    _ = xdev.profile_now(_prep_rgb_alpha)(rgb2)

    _ = xdev.profile_now(_alpha_blend_simple)(rgb1, alpha1, rgb2, alpha2)
    _ = xdev.profile_now(_alpha_blend_inplace)(rgb1, alpha1, rgb2, alpha2)
    _ = xdev.profile_now(_alpha_blend_numexpr1)(rgb1, alpha1, rgb2, alpha2)
    _  # NOQA
예제 #6
0
def bench_platform_test():
    """
    This is textbook premature optimization, but I was curious

    Results:
        Timed best=477.768 ns, mean=709.491 ± 128.4 ns for == win32
        Timed best=585.802 ns, mean=864.347 ± 191.0 ns for startswith(win32)
        Timed best=494.998 ns, mean=771.782 ± 135.9 ns for == linux
        Timed best=592.787 ns, mean=933.651 ± 177.2 ns for startswith(linux)
    """
    import ubelt as ub
    import sys
    ti = ub.Timerit(10000, bestof=100, verbose=1, unit='ns')

    for timer in ti.reset('== win32'):
        with timer:
            sys.platform == 'win32'

    for timer in ti.reset('startswith(win32)'):
        with timer:
            sys.platform.startswith('win32')

    for timer in ti.reset('== linux'):
        with timer:
            sys.platform == 'linux'

    for timer in ti.reset('startswith(linux)'):
        with timer:
            sys.platform.startswith('linux')
예제 #7
0
def bench_typecheck():
    import numpy as np
    datas = {
        'u': np.random.rand(10).astype(np.uint32),
        'i': np.random.rand(10).astype(np.int32),
        'f': np.random.rand(10).astype(np.float32),
    }

    import ubelt as ub
    ti = ub.Timerit(50000, bestof=200, label='time')

    for timer in ti.reset('in list'):
        with timer:
            for data in datas.values():
                data.dtype.kind in ['i', 'u']

    for timer in ti.reset('in set'):
        with timer:
            for data in datas.values():
                data.dtype.kind in {'i', 'u'}

    for timer in ti.reset('in tuple'):
        with timer:
            for data in datas.values():
                data.dtype.kind in ('i', 'u')

    for timer in ti.reset('two =='):
        with timer:
            for data in datas.values():
                data.dtype.kind == 'i' or data.dtype.kind == 'u'
예제 #8
0
def bench_imread():
    import ubelt as ub
    # fpath = ub.grabdata('http://www.topcoder.com/contest/problem/UrbanMapper3D/JAX_Tile_043_DTM.tif')

    import kwimage
    fpath = kwimage.grab_test_image_fpath('airport')

    # A color-table geotiff
    # https://download.osgeo.org/geotiff/samples/
    # fpath = ub.grabdata('https://download.osgeo.org/geotiff/samples/usgs/c41078a1.tif')

    ti = ub.Timerit(100, bestof=5, verbose=2)

    results = {}

    fpath = '/home/joncrall/data/sample_ptif.ptif'
    fpath = '/home/joncrall/data/sample_cog.cog.tif'

    for timer in ti.reset('gdal-v1'):
        with timer:
            image = _read_gdal_v1(fpath)
    results[ti.label] = image.sum()

    for timer in ti.reset('gdal-v2'):
        with timer:
            image = _read_gdal_v2(fpath)
    results[ti.label] = image.sum()

    for timer in ti.reset('rasterio'):
        with timer:
            image = _read_rasterio(fpath)
    results[ti.label] = image.sum()

    import skimage.io
    """
    pip install tifffile
    pip install imagecodecs

    """
    for timer in ti.reset('skimage'):
        with timer:
            image = skimage.io.imread(fpath)
    results[ti.label] = image.sum()

    import kwimage
    for timer in ti.reset('kwimage'):
        with timer:
            image = kwimage.imread(fpath)
    results[ti.label] = image.sum()

    import cv2
    for timer in ti.reset('cv2'):
        with timer:
            image = cv2.imread(fpath)
    results[ti.label] = image.sum()

    for timer in ti.reset('pil'):
        with timer:
            image = _read_pil(fpath)
    results[ti.label] = image.sum()
예제 #9
0
def benchmark_region_loss():
    """
    CommandLine:
        python ~/code/netharn/netharn/models/yolo2/light_region_loss.py benchmark_region_loss --profile

    Benchmark:
        >>> benchmark_region_loss()
    """
    from netharn.models.yolo2.light_yolo import Yolo
    torch.random.manual_seed(0)
    network = Yolo(num_classes=2, conf_thresh=4e-2)
    self = light_region_loss.RegionLoss(num_classes=network.num_classes,
                                        anchors=network.anchors)
    Win, Hin = 96, 96
    # true boxes for each item in the batch
    # each box encodes class, center, width, and height
    # coordinates are normalized in the range 0 to 1
    # items in each batch are padded with dummy boxes with class_id=-1
    target = torch.FloatTensor([
        # boxes for batch item 1
        [[0, 0.50, 0.50, 1.00, 1.00],
         [1, 0.32, 0.42, 0.22, 0.12]],
        # boxes for batch item 2 (it has no objects, note the pad!)
        [[-1, 0, 0, 0, 0],
         [-1, 0, 0, 0, 0]],
    ])
    im_data = torch.randn(len(target), 3, Hin, Win)
    output = network.forward(im_data)
    import ubelt
    for timer in ubelt.Timerit(250, bestof=10, label='time'):
        with timer:
            loss = float(self.forward(output, target))
    print('loss = {!r}'.format(loss))
예제 #10
0
def bench_sort_dictionary():
    """
    CommandLine:
        xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_sort_dictionary

    Results:
        Timed best=25.484 µs, mean=25.701 ± 0.1 µs for itemgetter
        Timed best=28.810 µs, mean=29.138 ± 0.3 µs for lambda
    """
    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    ti = ub.Timerit(1000, bestof=10, verbose=1)
    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]

    for timer in ti.reset('lambda'):
        with timer:
            key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])]

    del key_order
예제 #11
0
def bench_isinstance_vs_attr():
    instances = {
        'base1': Base1(),
        'base2': Base2(),
        'derived2': Derived2(),
    }

    import ubelt as ub
    ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us')

    # Do this twice, but keep the second measure
    data = ub.AutoDict()

    for selfname, self in instances.items():

        print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue'))

        subdata = data[selfname] = {}

        for timer in ti.reset('isinstance(self, Base1)'):
            with timer:
                isinstance(self, Base1)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('isinstance(self, Base2)'):
            with timer:
                isinstance(self, Base2)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('isinstance(self, Derived2)'):
            with timer:
                isinstance(self, Derived2)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, "class_attr1", False)'):
            with timer:
                getattr(self, 'class_attr1', False)
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, "attr1", False)'):
            with timer:
                getattr(self, 'attr1', False)
        subdata[ti.label] = ti.min()

    try:
        import pandas as pd
        df = pd.DataFrame(data) * 1e9
        try:
            from kwil.util.util_pandas import _to_string_monkey
            print(_to_string_monkey(df, key='minima'))
        except Exception:
            print(df)
    except ImportError:
        print('no pandas')
        print(ub.repr2(data, nl=2, precision=4))
예제 #12
0
파일: test_io.py 프로젝트: Erotemic/misc
        def test_disk_io(self):
            # Function that runs different strategies
            key = self.name()

            print('\n# --- {} ---'.format(key))
            write_ti = ub.Timerit(N_ITERS, label='{} write time'.format(self.ext))
            for timer in write_ti:
                with timer:
                    self.write()

            read_ti = ub.Timerit(N_ITERS, label='{} read time'.format(self.ext))
            for timer in read_ti:
                with timer:
                    for _ in self.read():
                        pass

            record.loc[key, 'write'] = write_ti.ave_secs
            record.loc[key, 'read'] = read_ti.ave_secs
            record.loc[key, 'MB'] = self.n_bytes() * 1e-6
            print('\n')
            print(record)
            print('-------')
예제 #13
0
def _benchmark():
    """
    On 64-bit processors sha512 may be faster than sha256

    References:
        https://crypto.stackexchange.com/questions/26336/sha512-faster-than-sha256
    """
    result = ub.AutoOrderedDict()
    algos = ['sha1', 'sha256', 'sha512']
    for n in ub.ProgIter([1, 10, 100, 1000, 10000, 100000], desc='time'):
        # for key in hashlib.algorithms_guaranteed:
        for key in algos:
            hashtype = _rectify_hasher(key)
            t1 = ub.Timerit(100, bestof=10, label=key, verbose=0)
            for timer in t1:
                data = b'8' * n
                with timer:
                    hasher = hashtype()
                    hasher.update(data)
            result[key][n] = t1.min()
    import pandas as pd
    print(pd.DataFrame(result))

    result = ub.AutoOrderedDict()
    for n in ub.ProgIter([1, 10, 100, 1000, 10000, 100000], desc='time'):
        # for key in hashlib.algorithms_guaranteed:
        for key in algos:
            hashtype = _rectify_hasher(key)
            t1 = ub.Timerit(100, bestof=10, label=key, verbose=0)
            for timer in t1:
                data = b'8' * n
                hasher = hashtype()
                hasher.update(data)
                with timer:
                    hasher.hexdigest()
            result[key][n] = t1.min()
    import pandas as pd
    print(pd.DataFrame(result))
    """
예제 #14
0
    def benchmark_write(xxd, proto):
        dpath = dpaths[xxd]
        args = {'proto': proto, 'xxd': xxd}

        fpath = join(dpath, 'test_{}.pkl'.format(proto))
        for timer in ub.Timerit(10, label='save {}'.format(args)):
            ub.delete(fpath)
            ub.writeto(fpath, 'junkdata')
            ub.delete(fpath)
            with timer:
                with open(fpath, 'wb') as file:
                    pickle.dump(data, file, protocol=proto)

        result = args.copy()
        result['write_time'] = timer.ellapsed

        for timer in ub.Timerit(10, label='read {}'.format(args)):
            with timer:
                with open(fpath, 'rb') as file:
                    pickle.load(file)

        result['read_time'] = timer.ellapsed
        return result
예제 #15
0
def bench_closures():
    """
    Is it faster to use a closure or pass in the variables explicitly?
    """
    import ubelt as ub
    import numpy as np

    # Test a nested func with vs without a closure
    def rand_complex(*shape):
        real = np.random.rand(*shape).astype(np.complex)
        imag = np.random.rand(*shape).astype(np.complex) * 1j
        mat = real + imag
        return mat

    s = int(ub.argval('--s', default='1'))
    mat1 = rand_complex(s, s)
    mat2 = rand_complex(s, s)
    N = 1000
    offset = 100

    def nested_closure():
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    def nested_explicit(mat1, mat2, N, offset):
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    ti = ub.Timerit(int(2 ** 11), bestof=int(2 ** 8), verbose=int(ub.argval('--verbose', default='1')))

    for timer in ti.reset('nested_explicit'):
        with timer:
            nested_explicit(mat1, mat2, N, offset)

    for timer in ti.reset('nested_closure'):
        with timer:
            nested_closure()

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
예제 #16
0
def bench_dict_hist():

    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    OrderedDict = ub.odict

    ti = ub.Timerit(1000, bestof=10, verbose=2)

    for timer in ti.reset('time'):
        with timer:
            getval = op.itemgetter(1)
            key_order = (key for (key, value) in sorted(hist_.items(), key=getval))
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('time'):
        with timer:
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            hist = OrderedDict([
                (key, value)
                for (key, value) in sorted(hist_.items(), key=getval)
            ])

    # -----------------

    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]

    for timer in ti.reset('lambda'):
        with timer:
            key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])]
예제 #17
0
def test_imresize_multi_channel():
    """
    Test that imresize works with multiple channels in various configurations
    """

    resize_kw_basis = {
        'dsize': [(10, 10), (60, 60)],
        'interpolation': ['area', 'linear', 'cubic', 'nearest']
    }

    image_basis = {
        'dims': [(32, 32), (37, 41), (53, 31)],
        'channels': [None, 1, 3, 4, 20, 1024],
        'dtype': ['uint8', 'float32', 'float64'],
    }

    resize_kw_list = list(basis_product(resize_kw_basis))

    failures = []
    success = []
    ti = ub.Timerit(1, bestof=1, verbose=1)

    for imgkw, img in image_variations(image_basis):
        for resize_kw in resize_kw_list:
            params = dict(resize_kw=resize_kw, imgkw=imgkw)
            try:
                label = ub.repr2(params,
                                 nl=0,
                                 nobr=True,
                                 si=1,
                                 sv=1,
                                 kvsep='=',
                                 itemsep='')
                for timer in ti.reset(label):
                    with timer:
                        kwimage.imresize(img, **resize_kw)
            except Exception:
                failures.append(label)
                print('FAILED = {!r}'.format(label))
                raise
            else:
                success.append(label)

    print('n_pass = {}'.format(len(success)))
    print('n_fail = {}'.format(len(failures)))
    print('failures = {}'.format(ub.repr2(failures, nl=1)))

    print('ti.rankings = {}'.format(
        ub.repr2(ti.rankings, nl=2, align=':', precision=6)))
예제 #18
0
def benchmark_all_mask_conversions():
    import kwimage
    import ubelt as ub

    base_mask = kwimage.Mask.random(shape=(256, 256))
    ti = ub.Timerit(1000, bestof=50, verbose=1, unit='us')

    from kwimage.structs.mask import MaskFormat  # NOQA
    for format1 in MaskFormat.cannonical:
        print('--- {} ---'.format(format1))
        mask1 = base_mask.toformat(format1)
        for format2 in MaskFormat.cannonical:
            for timer in ti.reset('{} -> {}'.format(format1, format2)):
                with timer:
                    mask1.toformat(format2)
예제 #19
0
def bench_dict_hist():
    """
    CommandLine:
        xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_dict_hist

    Results:
        Timed best=48.330 µs, mean=49.437 ± 1.0 µs for dict_subset_iter
        Timed best=59.392 µs, mean=63.395 ± 11.9 µs for dict_subset_list
        Timed best=47.203 µs, mean=47.632 ± 0.2 µs for direct_itemgetter
    """

    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    OrderedDict = ub.odict

    ti = ub.Timerit(1000, bestof=10, verbose=1)

    for timer in ti.reset('dict_subset_iter'):
        with timer:
            getval = op.itemgetter(1)
            key_order = (key for (key, value) in sorted(hist_.items(), key=getval))
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('dict_subset_list'):
        with timer:
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('direct_itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            hist = OrderedDict([
                (key, value)
                for (key, value) in sorted(hist_.items(), key=getval)
            ])

    del hist
예제 #20
0
def test_startup_time():
    import ubelt as ub
    cmdlist = [
        '/usr/bin/python2',
        '/usr/bin/python3',
        # '/home/joncrall/venv3.6/bin/python3',
        '/home/joncrall/.local/conda/envs/py36/bin/python',
    ]
    for exe in cmdlist:
        print('\n==========\nexe = {!r}'.format(exe))
        for opts in ['', '-B', '-S', '-OO']:
            args = ' -c "import argparse"'
            command = exe + ' ' + opts + args
            for timer in ub.Timerit(50, bestof=5, label=repr(command), verbose=1):
                with timer:
                    ub.cmd(command)
예제 #21
0
def main():
    import ubelt as ub
    from ubelt import util_list
    from ubelt.util_list import take
    import random
    from math import e

    # # Data
    N = 100
    array = [random.random() for _ in range(N)]
    indices = [random.randint(0, N - 1) for _ in range(int(N // e))]

    ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1)

    for timer in ti.reset('take'):
        with timer:
            list(take(array, indices))

    for timer in ti.reset('util_list.take'):
        with timer:
            list(util_list.take(array, indices))

    for timer in ti.reset('ub.take'):
        with timer:
            list(ub.take(array, indices))

    print('---')

    # import pandas as pd
    # df = pd.DataFrame(rankings)
    # print('df =\n{}'.format(df))

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
예제 #22
0
def fastfill_multipolygon():
    kwplot.autompl()
    shape = (1208, 1208)
    self = kwimage.MultiPolygon.random(10).scale(shape)

    ti = ub.Timerit(3, bestof=1, verbose=2, unit='us')
    for timer in ti.reset('draw_on'):
        with timer:
            mask = np.zeros(shape, dtype=np.uint8)
            mask = self.draw_on(mask)

    for timer in ti.reset('custom'):
        with timer:
            mask = np.zeros(shape, dtype=np.uint8)
            for p in self.data:
                if p is not None:
                    p.fill(mask, value=255)

    for timer in ti.reset('to_mask'):
        with timer:
            self.to_mask(shape)

    kwplot.imshow(mask)
예제 #23
0
def bench_memoize():
    import ubelt as ub
    @ub.memoize
    def memoized_func():
        return object()

    def raw_func():
        return object()

    class Foo(object):
        @ub.memoize_property
        def a_memoized_property(self):
            return object()

        @ub.memoize_method
        def a_memoized_method(self):
            return object()

        @property
        def a_raw_property(self):
            return object()

        def a_raw_method(self):
            return object()

    self = Foo()
    ti = ub.Timerit(1000, bestof=100, verbose=1, unit='ns')

    ti.reset('memoized method').call(lambda: self.a_memoized_method())
    ti.reset('raw method').call(lambda: self.a_raw_method())

    ti.reset('memoized func').call(lambda: memoized_func())
    ti.reset('raw func').call(lambda: raw_func())

    ti.reset('memoized property').call(lambda: self.a_memoized_property)
    ti.reset('raw property').call(lambda: self.a_raw_property)
예제 #24
0
def benchmark_select_rle_conversions():
    """
    Check what is the fastest way to encode an RLE
    """
    import kwimage
    import ubelt as ub
    c_mask = kwimage.Mask.random(shape=(256, 256))
    f_mask = c_mask.to_fortran_mask(copy=True)

    img = c_mask.data

    ti = ub.Timerit(1000, bestof=50, verbose=1)

    for timer in ti.reset('img -> encode_run_length(non-binary)'):
        with timer:
            kwimage.encode_run_length(img, binary=False)

    for timer in ti.reset('img -> encode_run_length(binary)'):
        with timer:
            kwimage.encode_run_length(img, binary=True)

    for timer in ti.reset('c_mask -> to_array_rle'):
        with timer:
            c_mask.to_array_rle()

    for timer in ti.reset('c_mask -> to_bytes_rle'):
        with timer:
            c_mask.to_bytes_rle()

    for timer in ti.reset('f_mask -> to_array_rle'):
        with timer:
            f_mask.to_array_rle()

    for timer in ti.reset('f_mask -> to_bytes_rle'):
        with timer:
            f_mask.to_bytes_rle()
예제 #25
0
def time_grid_create_methods():
    import numpy as np
    import ubelt
    N = 10
    K = 1001
    for timer in ubelt.Timerit(100, bestof=10, label='hv-stack'):
        with timer:
            ns = np.hstack([np.arange(N)[:, None]] * K).ravel()
            ks = np.vstack([np.arange(K)[None, :]] * N).ravel()

    for timer in ubelt.Timerit(100, bestof=10, label='tile'):
        with timer:
            ns = np.tile(np.arange(N)[:, None], (1, K)).ravel()
            ks = np.tile(np.arange(K), (N, 1)).ravel()

    for timer in ubelt.Timerit(100, bestof=10, label='repeat+arange'):
        with timer:
            ns = np.repeat(np.arange(N), K, axis=0).ravel()
            ks = np.repeat(np.arange(K)[None, :], N, axis=0).ravel()

    for timer in ubelt.Timerit(100, bestof=10, label='mgrid'):
        with timer:
            ns, ks = np.mgrid[0:N, 0:K]
            ns = ns.ravel()
            ks = ks.ravel()

    for timer in ubelt.Timerit(100, bestof=10, label='meshgrid'):
        with timer:
            ks, ns = np.meshgrid(np.arange(K), np.arange(N))
            ns = ns.ravel()
            ks = ks.ravel()

    for timer in ubelt.Timerit(100, bestof=10, label='ogrid+repeat'):
        with timer:
            ns_basis, ks_basis = np.ogrid[0:N, 0:K]
            ns = np.repeat(ns_basis, K, axis=0).ravel()
            ks = np.repeat(ks_basis, N, axis=0).ravel()
예제 #26
0
def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()
예제 #27
0
def benchmark_nms_version():
    """
        xdoctset netharn.models.yolo2.light_postproc benchmark_nms_version
    """
    # Build random test boxes and scores
    from lightnet.data.transform._postprocess import NonMaxSupression
    import netharn as nh
    num = 16 * 16 * 5
    rng = nh.util.ensure_rng(0)
    cpu_boxes = nh.util.Boxes.random(num, scale=416.0, rng=rng, format='tlbr', tensor=True)
    cpu_tlbr = cpu_boxes.to_tlbr().data
    # cpu_scores = torch.Tensor(rng.rand(len(cpu_tlbr)))
    # make all scores unique to ensure comparability
    cpu_scores = torch.Tensor(np.linspace(0, 1, len(cpu_tlbr)))
    cpu_cls = torch.LongTensor(rng.randint(0, 20, len(cpu_tlbr)))

    # Format boxes in lightnet format
    cxywh_score_cls = torch.cat([cpu_boxes.to_cxywh().data,
                                 cpu_scores[:, None],
                                 cpu_cls.float()[:, None]], dim=-1)

    gpu = torch.device('cuda', 0)
    gpu_ln_boxes = cxywh_score_cls.to(gpu)

    thresh = .5

    def _ln_output_to_keep(ln_output, ln_boxes):
        keep = []
        for row in ln_output:
            # Find the index that we kept
            idxs = np.where(np.all(np.isclose(ln_boxes, row), axis=1))[0]
            assert len(idxs) == 1
            keep.append(idxs[0])
        assert np.all(np.isclose(ln_boxes[keep], ln_output))
        return keep

    N = 12
    bestof = 3

    t1 = ub.Timerit(N, bestof=bestof, label='lightnet()')
    for timer in t1:
        with timer:
            ln_output = NonMaxSupression._nms(gpu_ln_boxes, nms_thresh=thresh,
                                              class_nms=True, fast=False)
            torch.cuda.synchronize()
    ln_keep = _ln_output_to_keep(ln_output, gpu_ln_boxes)

    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)])
    self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=thresh)

    t1 = ub.Timerit(N, bestof=bestof, label='netharn(mode0)')
    for timer in t1:
        with timer:
            nh_output = self._nms(gpu_ln_boxes, nms_mode=0)
            torch.cuda.synchronize()
    nh_keep_0 = _ln_output_to_keep(nh_output, gpu_ln_boxes)

    t1 = ub.Timerit(N, bestof=bestof, label='netharn(mode1)')
    for timer in t1:
        with timer:
            nh_output = self._nms(gpu_ln_boxes, nms_mode=1)
            torch.cuda.synchronize()
    nh_keep_1 = _ln_output_to_keep(nh_output, gpu_ln_boxes)

    t1 = ub.Timerit(N, bestof=bestof, label='netharn(mode2)')
    for timer in t1:
        with timer:
            nh_output = self._nms(gpu_ln_boxes, nms_mode=2)
            torch.cuda.synchronize()
    nh_keep_2 = _ln_output_to_keep(nh_output, gpu_ln_boxes)

    t1 = ub.Timerit(N, bestof=bestof, label='netharn(mode3)')
    for timer in t1:
        with timer:
            nh_output = self._nms(gpu_ln_boxes, nms_mode=3)
            torch.cuda.synchronize()
    nh_keep_3 = _ln_output_to_keep(nh_output, gpu_ln_boxes)

    t1 = ub.Timerit(N, bestof=bestof, label='netharn(mode4)')
    for timer in t1:
        with timer:
            nh_output = self._nms(gpu_ln_boxes, nms_mode=4)
            torch.cuda.synchronize()
    nh_keep_4 = _ln_output_to_keep(nh_output, gpu_ln_boxes)

    nh_keep_0 == nh_keep_2
    nh_keep_0 == nh_keep_3

    print('ln_keep = {!r}'.format(len(ln_keep)))
    print('len(nh_keep_0) = {!r}'.format(len(nh_keep_0)))
    print('len(nh_keep_1) = {!r}'.format(len(nh_keep_1)))
    print('len(nh_keep_2) = {!r}'.format(len(nh_keep_2)))
    print('len(nh_keep_3) = {!r}'.format(len(nh_keep_3)))
    print('len(nh_keep_4) = {!r}'.format(len(nh_keep_4)))
예제 #28
0
파일: torch_nms.py 프로젝트: jcfr/netharn
def _benchmark():
    """
    python -m netharn.util.nms.torch_nms _benchmark --show

    SeeAlso:
        PJR Darknet NonMax supression
        https://github.com/pjreddie/darknet/blob/master/src/box.c

        Lightnet NMS
        https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116

    """
    import torch
    import numpy as np
    import netharn as nh
    from netharn.util.nms.torch_nms import torch_nms
    from netharn.util import non_max_supression
    import ubelt as ub
    import itertools as it

    N = 100
    bestof = 10

    ydata = ub.ddict(list)
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000]

    # max number of boxes yolo will spit out at a time
    max_boxes = 19 * 19 * 5

    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500,
        max_boxes
    ]
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500]
    xdata = [10, 100, 500]

    rng = nh.util.ensure_rng(0)

    thresh = 0.5

    for num in xdata:
        print('\n\n---- number of boxes = {} ----\n'.format(num))

        outputs = {}

        # Build random test boxes and scores
        cpu_boxes = nh.util.Boxes.random(num,
                                         scale=10.0,
                                         rng=rng,
                                         format='tlbr',
                                         tensor=True)
        cpu_tlbr = cpu_boxes.to_tlbr().data
        # cpu_scores = torch.Tensor(rng.rand(len(cpu_tlbr)))
        # make all scores unique to ensure comparability
        cpu_scores = torch.Tensor(np.linspace(0, 1, len(cpu_tlbr)))
        cpu_cls = torch.LongTensor(rng.randint(0, 10, len(cpu_tlbr)))

        # Format boxes in lightnet format
        cpu_ln_boxes = torch.cat([
            cpu_boxes.to_cxywh().data, cpu_scores[:, None],
            cpu_cls.float()[:, None]
        ],
                                 dim=-1)

        # Move boxes to numpy
        np_tlbr = cpu_tlbr.numpy()
        np_scores = cpu_scores.numpy()
        np_cls = cpu_cls.numpy()  # NOQA

        gpu = torch.device('cuda', 0)

        measure_gpu = torch.cuda.is_available()
        measure_cpu = False or not torch.cuda.is_available()

        def _ln_output_to_keep(ln_output, ln_boxes):
            keep = []
            for row in ln_output:
                # Find the index that we kept
                idxs = np.where(np.all(np.isclose(ln_boxes, row), axis=1))[0]
                assert len(idxs) == 1
                keep.append(idxs[0])
            assert np.all(np.isclose(ln_boxes[keep], ln_output))
            return keep

        if measure_gpu:
            # Move boxes to the GPU
            gpu_tlbr = cpu_tlbr.to(gpu)
            gpu_scores = cpu_scores.to(gpu)
            gpu_cls = cpu_cls.to(gpu)  # NOQA
            gpu_ln_boxes = cpu_ln_boxes.to(gpu)

            t1 = ub.Timerit(N, bestof=bestof, label='torch(gpu)')
            for timer in t1:
                with timer:
                    keep = torch_nms(gpu_tlbr, gpu_scores, thresh=thresh)
                    torch.cuda.synchronize()
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = np.where(keep.cpu().numpy())[0]

            t1 = ub.Timerit(N, bestof=bestof, label='cython(gpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='gpu')
                    torch.cuda.synchronize()
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            from lightnet.data.transform._postprocess import NonMaxSupression
            t1 = ub.Timerit(N, bestof=bestof, label='lightnet-slow(gpu)')
            for timer in t1:
                with timer:
                    ln_output = NonMaxSupression._nms(gpu_ln_boxes,
                                                      nms_thresh=thresh,
                                                      class_nms=False,
                                                      fast=False)
                    torch.cuda.synchronize()
            # convert lightnet NMS output to keep for consistency
            keep = _ln_output_to_keep(ln_output, gpu_ln_boxes)
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            if False:
                t1 = ub.Timerit(N, bestof=bestof, label='lightnet-fast(gpu)')
                for timer in t1:
                    with timer:
                        ln_output = NonMaxSupression._nms(gpu_ln_boxes,
                                                          nms_thresh=thresh,
                                                          class_nms=False,
                                                          fast=True)
                        torch.cuda.synchronize()
                # convert lightnet NMS output to keep for consistency
                keep = _ln_output_to_keep(ln_output, gpu_ln_boxes)
                ydata[t1.label].append(t1.min())
                outputs[t1.label] = sorted(keep)

        if measure_cpu:
            t1 = ub.Timerit(N, bestof=bestof, label='torch(cpu)')
            for timer in t1:
                with timer:
                    keep = torch_nms(cpu_tlbr, cpu_scores, thresh=thresh)
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = np.where(keep.cpu().numpy())[0]

        if True:
            t1 = ub.Timerit(N, bestof=bestof, label='cython(cpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='cpu')
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            t1 = ub.Timerit(N, bestof=bestof, label='numpy(cpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='py')
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

        # Check that all kept boxes do not have more than `threshold` ious
        for key, idxs in outputs.items():
            ious = nh.util.box_ious(np_tlbr[idxs], np_tlbr[idxs])
            max_iou = (np.tril(ious) - np.eye(len(ious))).max()
            if max_iou > thresh:
                print('{} produced a bad result with max_iou={}'.format(
                    key, max_iou))

        # Check result consistency:
        print('\nResult stats:')
        for key in sorted(outputs.keys()):
            print('    * {:<20}: num={}'.format(key, len(outputs[key])))

        print('\nResult overlaps (method1, method2: jaccard):')
        datas = []
        for k1, k2 in it.combinations(sorted(outputs.keys()), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / len(idxs1 | idxs2)
            datas.append((k1, k2, jaccard))
        datas = sorted(datas, key=lambda x: -x[2])
        for k1, k2, jaccard in datas:
            print('    * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard))

    nh.util.mplutil.autompl()
    nh.util.mplutil.multi_plot(xdata,
                               ydata,
                               xlabel='num boxes',
                               ylabel='seconds')
    nh.util.show_if_requested()
예제 #29
0
def benchmark_attribute_access():
    """
    How fast are different methods of accessing attributes? Lets find out!
    """

    instances = {
        'simple': Simple(),
        'complex': Complex(),
        'slot_simple': SimpleWithSlots(),
        'slot_complex': ComplexWithSlots(),
    }

    import ubelt as ub

    ti = ub.Timerit(100000, bestof=500, verbose=1, unit='us')

    # Do this twice, but keep the second measure
    data = ub.AutoDict()

    for selfname, self in instances.items():

        print(ub.color_text('--- SELF = {} ---'.format(selfname), 'blue'))

        subdata = data[selfname] = {}

        for timer in ti.reset('self.attr1'):
            with timer:
                self.attr1
        subdata[ti.label] = ti.min()

        for timer in ti.reset('getattr(self, attr1)'):
            with timer:
                getattr(self, 'attr1')
        subdata[ti.label] = ti.min()

        attrs = ['attr1', 'attr2']

        for attrname in attrs:
            for timer in ti.reset('hasattr(self, {})'.format(attrname)):
                with timer:
                    hasattr(self, attrname)
            subdata[ti.label] = ti.min()

            for timer in ti.reset('getattr(self, {}, None)'.format(attrname)):
                with timer:
                    getattr(self, attrname, None)
            subdata[ti.label] = ti.min()

            if 'slot' not in selfname.lower():
                for timer in ti.reset(
                        'self.__dict__.get({}, None)'.format(attrname)):
                    with timer:
                        self.__dict__.get(attrname, None)
                subdata[ti.label] = ti.min()

        for timer in ti.reset('try/except: self.attr2'):
            with timer:
                try:
                    x = self.attr2
                except AttributeError:
                    x = None
        subdata[ti.label] = ti.min()

        for timer in ti.reset('try/except: self.attr1'):
            with timer:
                try:
                    x = self.attr1
                except AttributeError:
                    x = None
        subdata[ti.label] = ti.min()

        del x

    try:
        import pandas as pd
        df = pd.DataFrame(data) * 1e9
        try:
            from kwil.util.util_pandas import _to_string_monkey
            print(_to_string_monkey(df, key='minima'))
        except Exception:
            print(df)
    except ImportError:
        print('no pandas')
        print(ub.repr2(data, nl=2, precision=4))
예제 #30
0
def _benchmark_cog_conversions():
    """
    CommandLine:
        xdoctest -m ~/code/ndsampler/ndsampler/utils/util_gdal.py _benchmark_cog_conversions
    """
    # Benchmark
    # xdoc: +REQUIRES(--bench)
    from ndsampler.utils.validate_cog import validate
    import xdev
    import kwimage
    # Prepare test data
    shape = (8000, 8000, 1)
    print('Test data shape = {!r}'.format(shape))
    data = np.random.randint(0, 255, shape, dtype=np.uint16)
    print('Test data size = {}'.format(
        xdev.byte_str(data.size * data.dtype.itemsize)))

    dpath = ub.ensure_app_cache_dir('ndsampler', 'cog_benchmark')
    src_fpath = join(dpath, 'src.png')
    kwimage.imwrite(src_fpath, data)

    # Benchmark conversions
    dst_api_fpath = join(dpath, 'dst_api.tiff')
    dst_cli_fpath = join(dpath, 'dst_cli.tiff')
    dst_data_fpath = join(dpath, 'dst_data.tiff')

    ti = ub.Timerit(3, bestof=3, verbose=3, unit='s')

    compress = 'RAW'
    compress = 'DEFLATE'
    blocksize = 256

    if 1:

        for timer in ti.reset('cov-convert-data'):
            ub.delete(dst_data_fpath)
            with timer:
                _imwrite_cloud_optimized_geotiff(dst_data_fpath,
                                                 data,
                                                 compress=compress,
                                                 blocksize=blocksize)
        assert not len(validate(dst_data_fpath)[1])

        for timer in ti.reset('cog-convert-api2'):
            ub.delete(dst_api_fpath)
            with timer:
                _api_convert_cloud_optimized_geotiff2(src_fpath,
                                                      dst_api_fpath,
                                                      compress=compress,
                                                      blocksize=blocksize)
        assert not len(validate(dst_api_fpath)[1])

        for timer in ti.reset('cog-convert-api'):
            ub.delete(dst_api_fpath)
            with timer:
                _api_convert_cloud_optimized_geotiff(src_fpath,
                                                     dst_api_fpath,
                                                     compress=compress,
                                                     blocksize=blocksize)
        assert not len(validate(dst_api_fpath)[1])

    for timer in ti.reset('cog-convert-cli'):
        ub.delete(dst_cli_fpath)
        with timer:
            _cli_convert_cloud_optimized_geotiff(src_fpath,
                                                 dst_cli_fpath,
                                                 compress=compress,
                                                 blocksize=blocksize)
    assert not len(validate(dst_data_fpath)[1])

    if ub.find_exe('cog'):
        # requires pip install cogeotiff
        for timer in ti.reset('cogeotiff cli --compress {}'.format(compress)):
            ub.delete(dst_cli_fpath)
            with timer:
                info = ub.cmd(
                    'cog create {} {} --compress {} --block-size {}'.format(
                        src_fpath, dst_cli_fpath, compress, blocksize),
                    verbose=0)
                assert info['ret'] == 0
        assert not len(validate(dst_data_fpath)[1])