예제 #1
0
    def difference(self, other):
        """
        Set difference

        Example:
            >>> self = ChannelSpec('rgb|disparity,flowx|flowy')
            >>> other = ChannelSpec('rgb')
            >>> self.difference(other)
            >>> other = ChannelSpec('flowx')
            >>> self.difference(other)
        """
        assert len(list(other.keys())) == 1, 'can take diff with one stream'
        other_norm = ub.oset(ub.peek(other.normalize().values()))
        self_norm = self.normalize()

        new_streams = []
        for key, parts in self_norm.items():
            new_parts = ub.oset(parts) - ub.oset(other_norm)
            # shrink the representation of a complex r|g|b to an alias if
            # possible.
            # TODO: make this more efficient
            for alias, alias_spec in self._known.items():
                alias_parts = ub.oset(alias_spec.split('|'))
                index = subsequence_index(new_parts, alias_parts)
                if index is not None:
                    oset_delitem(new_parts, index)
                    oset_insert(new_parts, index.start, alias)
            new_stream = '|'.join(new_parts)
            new_streams.append(new_stream)
        new_spec = ','.join(new_streams)
        new = self.__class__(new_spec)
        return new
예제 #2
0
파일: test_oset.py 프로젝트: Kulbear/ubelt
def test_equality():
    def check(a, b):
        # Self checks
        assert a == a
        assert a >= a
        assert a <= a
        assert not a < a
        assert not a > a
        assert not a != a

        # Lesser checks
        assert a < b
        assert a <= b
        assert a != b
        assert not a == b

        # Greater checks
        assert b > a
        assert b >= a
        assert b != a
        assert not b == a

    a = ub.oset([])
    b = ub.oset([1])
    c = ub.oset([1, 2])
    d = ub.oset([1, 2, 3])

    check(a, b)
    check(b, c)
    check(c, d)
    check(a, d)
    check(a, d)
예제 #3
0
파일: fit_harn.py 프로젝트: hjweide/netharn
        def _epochs_to_remove(existing_epochs, num_keep):
            """
            doctest:
                >>> import netharn as nh
                >>> harn = FitHarn({})
                >>> rng = np.random.RandomState(0)
                >>> harn.monitor = nh.Monitor(minimize=['loss'], maximize=['miou'])
                >>> for epoch in range(200):
                >>>     harn.monitor.update(epoch, {'loss': rng.rand(),
                >>>                                 'miou': rng.rand()})
                >>> existing_epochs = list(range(0, 200, 4))
                >>> num_keep = 10
            """
            keep = set()

            recent = existing_epochs[-num_keep:]
            keep.update(recent)

            if harn.monitor:
                for best_epochs in harn.monitor.best_epochs().values():
                    best = ub.oset(best_epochs).intersection(existing_epochs)
                keep.update(best[:num_keep])

            to_remove = set(existing_epochs) - keep
            return to_remove
예제 #4
0
파일: fit_harness.py 프로젝트: afcarl/clab
        def _epochs_to_remove(epochs):
            """
            Doctest:
                >>> harn = FitHarness()
                >>> rng = np.random.RandomState(0)
                >>> for epoch in range(200):
                >>>     harn.monitor.update(epoch, {'loss': rng.rand(),
                >>>                                 'miou': rng.rand()})
                >>> epochs = list(range(0, 200, 4))
            """
            num_keep_recent = 10
            num_keep_best = 10

            keep = set()

            recent = epochs[-num_keep_recent:]
            keep.update(recent)

            if harn.monitor:
                best_epochs = harn.monitor.best_epochs()
                best = ub.oset(best_epochs).intersection(epochs)
                keep.update(best[-num_keep_best:])

            to_remove = set(epochs) - keep
            return to_remove
예제 #5
0
    def _epochs_to_remove(harn, existing_epochs, num_keep_recent,
                          num_keep_best, keep_freq):
        """
        Unit testable helper for `cleanup_snapshots`. Determines which epochs
        to remove given which epoches exist.

        Keeps `keep_freq` most recent, `num_keep_best` best, and one every
        `keep_freq` epochs.

        Doctest:
            >>> import netharn as nh
            >>> harn = FitHarn({})
            >>> rng = np.random.RandomState(0)
            >>> harn.monitor = nh.Monitor(minimize=['loss'], maximize=['miou'])
            >>> for epoch in range(200):
            >>>     harn.monitor.update(epoch, {'loss': rng.rand(),
            >>>                                 'miou': rng.rand()})
            >>> existing_epochs = list(range(0, 200, 4))
            >>> num_keep_best = 10
            >>> num_keep_recent = 10
            >>> keep_freq = 10
            >>> to_remove = harn._epochs_to_remove(existing_epochs,
            >>>                                    num_keep_recent, num_keep_best,
            >>>                                    keep_freq)
            >>> assert len(existing_epochs) - len(to_remove) < 40
        """
        keep = set()

        recent = existing_epochs[-num_keep_recent:]
        keep.update(recent)

        # TODO: add a config for always keeping specific iterations in
        # multiples of X.

        if harn.monitor:
            for best_epochs in harn.monitor.best_epochs().values():
                best = ub.oset(best_epochs).intersection(existing_epochs)
            keep.update(best[:num_keep_best])

        # Keep a strided sampling of epochs
        epoch_arr = np.array(existing_epochs)
        flags = ((epoch_arr % keep_freq) == 0)
        sampled = epoch_arr[flags]
        keep.update(sampled)

        to_remove = set(existing_epochs) - keep
        return to_remove
예제 #6
0
    def argparse(self, parser=None, special_options=False):
        """
        construct or update an argparse.ArgumentParser CLI parser

        Args:
            parser (None | argparse.ArgumentParser): if specified this
                parser is updated with options from this config.

            special_options (bool, default=False):
                adds special scriptconfig options, namely: --config, --dumps,
                and --dump.

        Returns:
            argparse.ArgumentParser : a new or updated argument parser

        CommandLine:
            xdoctest -m scriptconfig.config Config.argparse:0
            xdoctest -m scriptconfig.config Config.argparse:1

        TODO:
            A good CLI spec for lists might be

            # In the case where ``key`` ends with and ``=``, assume the list is
            # given as a comma separated string with optional square brakets at
            # each end.

            --key=[f]

            # In the case where ``key`` does not end with equals and we know
            # the value is supposd to be a list, then we consume arguments
            # until we hit the next one that starts with '--' (which means
            # that list items cannot start with -- but they can contains
            # commas)

        FIXME:

            * In the case where we have an nargs='+' action, and we specify
              the option with an `=`, and then we give position args after it
              there is no way to modify behavior of the action to just look at
              the data in the string without modifying the ArgumentParser
              itself. The action object has no control over it. For example
              `--foo=bar baz biz` will parse as `[baz, biz]` which is really
              not what we want. We may be able to overload ArgumentParser to
              fix this.

        Example:
            >>> # You can now make instances of this class
            >>> import scriptconfig
            >>> self = scriptconfig.Config.demo()
            >>> parser = self.argparse()
            >>> parser.print_help()
            >>> # xdoctest: +REQUIRES(PY3)
            >>> # Python2 argparse does a hard sys.exit instead of raise
            >>> ns, extra = parser.parse_known_args()

        Example:
            >>> # You can now make instances of this class
            >>> import scriptconfig as scfg
            >>> class MyConfig(scfg.Config):
            >>>     description = 'my CLI description'
            >>>     default = {
            >>>         'path1':  scfg.Value(None, position=1, alias='src'),
            >>>         'path2':  scfg.Value(None, position=2, alias='dst'),
            >>>         'dry':  scfg.Value(False, isflag=True),
            >>>         'approx':  scfg.Value(False, isflag=False, alias=['a1', 'a2']),
            >>>     }
            >>> self = MyConfig()
            >>> special_options = True
            >>> parser = None
            >>> parser = self.argparse(special_options=special_options)
            >>> parser.print_help()
            >>> self._read_argv(argv=['objection', '42', '--path1=overruled!'])
            >>> print('self = {!r}'.format(self))

        Ignore:
            >>> self._read_argv(argv=['hi','--path1=foobar'])
            >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar'])
            >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar', '--help'])
            >>> self._read_argv(argv=['--path1=foobar', '--path1=baz'])
            >>> print('self = {!r}'.format(self))
        """
        import argparse

        if parser is None:
            parserkw = self._parserkw()
            parser = argparse.ArgumentParser(**parserkw)

        # Use custom action used to mark which values were explicitly set on
        # the commandline
        parser._explicitly_given = set()

        parent = self

        class ParseAction(argparse.Action):
            def __init__(self, *args, **kwargs):
                super(ParseAction, self).__init__(*args, **kwargs)
                # with script config nothing should be required by default all
                # positional arguments should have keyword arg variants Setting
                # required=False here will prevent positional args from
                # erroring if they are not specified. I dont think there are
                # other side effects, but we should make sure that is actually
                # the case.
                self.required = False

                if self.type is None:
                    # Is this the right place to put this?
                    def _mytype(value):
                        key = self.dest
                        template = parent.default[key]
                        if not isinstance(template, Value):
                            # smartcast non-valued params from commandline
                            value = smartcast.smartcast(value)
                        else:
                            value = template.cast(value)
                        return value

                    self.type = _mytype

                # print('self.type = {!r}'.format(self.type))

            def __call__(action, parser, namespace, values, option_string=None):
                # print('CALL action = {!r}'.format(action))
                # print('option_string = {!r}'.format(option_string))
                # print('values = {!r}'.format(values))

                if isinstance(values, list) and len(values):
                    # We got a list of lists, which we hack into a flat list
                    if isinstance(values[0], list):
                        import itertools as it
                        values = list(it.chain(*values))

                setattr(namespace, action.dest, values)
                parser._explicitly_given.add(action.dest)

        # IRC: this ensures each key has a real Value class
        _metadata = {
            key: self._data[key]
            for key, value in self._default.items()
            if isinstance(self._data[key], Value)
        }  # :type: Dict[str, Value]
        _positions = {k: v.position for k, v in _metadata.items()
                      if v.position is not None}
        if _positions:
            if ub.find_duplicates(_positions.values()):
                raise Exception('two values have the same position')
            _keyorder = ub.oset(ub.argsort(_positions))
            _keyorder |= (ub.oset(self._default) - _keyorder)
        else:
            _keyorder = list(self._default.keys())

        def _add_arg(parser, name, key, argkw, positional, isflag, isalias):
            _argkw = argkw.copy()

            if isalias:
                _argkw['help'] = 'alias of {}'.format(key)
                _argkw.pop('default', None)
                # flags cannot have flag aliases
                isflag = False

            elif positional:
                parser.add_argument(name, **_argkw)

            if isflag:
                # Can we support both flag and setitem methods of cli
                # parsing?
                if not isinstance(_argkw.get('default', None), bool):
                    raise ValueError('can only use isflag with bools')
                _argkw.pop('type', None)
                _argkw.pop('choices', None)
                _argkw.pop('action', None)
                _argkw.pop('nargs', None)
                _argkw['dest'] = key

                _argkw_true = _argkw.copy()
                _argkw_true['action'] = 'store_true'

                _argkw_false = _argkw.copy()
                _argkw_false['action'] = 'store_false'
                _argkw_false.pop('help', None)

                parser.add_argument('--' + name, **_argkw_true)
                parser.add_argument('--no-' + name, **_argkw_false)
            else:
                parser.add_argument('--' + name, **_argkw)

        mode = 1

        alias_registry = []
        for key, value in self._data.items():
            # key: str
            # value: Any | Value
            argkw = {}
            argkw['help'] = ''
            positional = None
            isflag = False
            if key in _metadata:
                # Use the metadata in the Value class to enhance argparse
                _value = _metadata[key]
                argkw.update(_value.parsekw)
                value = _value.value
                isflag = _value.isflag
                positional = _value.position
            else:
                _value = value if isinstance(value, Value) else None

            if not argkw['help']:
                argkw['help'] = '<undocumented>'

            argkw['default'] = value
            argkw['action'] = ParseAction

            name = key
            _add_arg(parser, name, key, argkw, positional, isflag, isalias=False)

            if _value is not None:
                if _value.alias:
                    alts = _value.alias
                    alts = alts if ub.iterable(alts) else [alts]
                    for alias in alts:
                        tup = (alias, key, argkw)
                        alias_registry.append(tup)
                        if mode == 0:
                            name = alias
                            _add_arg(parser, name, key, argkw, positional, isflag, isalias=True)

        if mode == 1:
            for tup in alias_registry:
                (alias, key, argkw) = tup
                name = alias
                dest = key
                _add_arg(parser, name, dest, argkw, positional, isflag, isalias=True)

        if special_options:
            parser.add_argument('--config', default=None, help=ub.codeblock(
                '''
                special scriptconfig option that accepts the path to a on-disk
                configuration file, and loads that into this {!r} object.
                ''').format(self.__class__.__name__))

            parser.add_argument('--dump', default=None, help=ub.codeblock(
                '''
                If specified, dump this config to disk.
                ''').format(self.__class__.__name__))

            parser.add_argument('--dumps', action='store_true', help=ub.codeblock(
                '''
                If specified, dump this config stdout
                ''').format(self.__class__.__name__))

        return parser
예제 #7
0
def benchmark_nested_break():
    """
    There are several ways to do a nested break, but which one is best?

    https://twitter.com/nedbat/status/1515345787563220996
    """
    import ubelt as ub
    import pandas as pd
    import timerit
    import itertools as it

    def method1_itertools(iter1, iter2):
        for i, j in it.product(iter1, iter2):
            if i == 20 and j == 20:
                break

    def method2_except(iter1, iter2):
        class Found(Exception):
            pass
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise Found
        except Found:
            pass

    class FoundPredef(Exception):
        pass

    def method2_5_except_predef(iter1, iter2):
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise FoundPredef
        except FoundPredef:
            pass

    def method3_gendef(iter1, iter2):
        def genfunc():
            for i in iter1:
                for j in iter2:
                    yield i, j

        for i, j in genfunc():
            if i == 20 and j == 20:
                break

    def method4_genexp(iter1, iter2):
        genexpr = ((i, j) for i in iter1 for j in iter2)
        for i, j in genexpr:
            if i == 20 and j == 20:
                break

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(1000, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    import numpy as np
    basis = {
        'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'],
        # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int),
        'input_style': ['range', 'list', 'customized_iter'],
        # 'param_name': [param values],
    }
    xlabel = 'size'
    xinput_labels = ['n1', 'n2', 'size']

    # Set these to param labels that directly transfer to method kwargs
    kw_labels = []
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['input_style'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    def make_input(params):
        # Given the parameterization make the benchmark function input
        # n1 = params['n1']
        # n2 = params['n2']
        size = params['size']
        n1 = int(np.sqrt(size))
        n2 = int(np.sqrt(size))
        if params['input_style'] == 'list':
            iter1 = list(range(n1))
            iter2 = list(range(n1))
        elif params['input_style'] == 'range':
            iter1 = range(n1)
            iter2 = range(n2)
        elif params['input_style'] == 'customized_iter':
            import random
            def rando1():
                rng1 = random.Random(0)
                for _ in range(n1):
                    yield rng1.randint(0, n2)

            def rando2():
                rng2 = random.Random(1)
                for _ in range(n1):
                    yield rng2.randint(0, n2)

            iter1 = rando1()
            iter2 = rando2()
        else:
            raise KeyError
        return {'iter1': iter1, 'iter2': iter2}

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        # size = params['n1'] * params['n2']
        # params['size'] = size
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(),  kw_labels)

        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            kwargs.update(make_input(params))
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            # chunk_iter = ub.chunks(ti.times, ti.bestof)
            # times = list(map(min, chunk_iter))  # TODO: timerit method for this
            times = ti.robust_times()
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1)))
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw)
        ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}')
        ax.set_xlabel(f'{xlabel}')
        ax.set_ylabel('Time')
        ax.set_xscale('log')
        ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
예제 #8
0
def run_pvpoke_ultra_experiment():
    """
    https://pvpoke.com/battle/matrix/

    !pip install selenium
    """
    """
    Relevant page items:

    <button class="add-poke-btn button">+ Add Pokemon</button>
    '//*[@id="main"]/div[3]/div[3]/div/div[1]/button[1]'
    '/html/body/div[1]/div/div[3]/div[3]/div/div[1]/button[1]'

    <input class="poke-search" type="text" placeholder="Search name">
    /html/body/div[5]/div/div[3]/div[1]/input


    /html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/a/span[1]


    Level Cap
    /html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[5]


    # IV GROUP
    ivs-group

    save-poke

    import sys, ubelt
    sys.path.append(ubelt.expandpath('~/code/pypogo'))
    from pypogo.pvpoke_experiment import *  # NOQA
    from pypogo.pvpoke_experiment import _oldstuff
    """
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support.ui import Select
    import ubelt as ub
    import os
    import pathlib
    import time
    import pandas as pd
    import pypogo

    # Requires the driver be in the PATH
    fpath = ensure_selenium_chromedriver()
    os.environ['PATH'] = os.pathsep.join(
        ub.oset(os.environ['PATH'].split(os.pathsep))
        | ub.oset([str(fpath.parent)]))

    url = 'https://pvpoke.com/battle/matrix/'
    # chrome_exe = ub.find_exe("google-chrome")
    driver = webdriver.Chrome()
    driver.get(url)

    league = 'Great'
    # league = 'Master40'
    if league == 'Great':
        league_box_target = 'Great League (CP 1500)'

        have_ivs = list(
            ub.oset([
                tuple([int(x) for x in p.strip().split(',') if x])
                for p in ub.codeblock('''
            10, 10, 12,
            10, 12, 14,
            10, 12, 14,
            10, 13, 10,
            10, 13, 12,
            10, 14, 14,
            11, 12, 14,
            11, 14, 12,
            11, 14, 15,
            11, 15, 11,
            11, 15, 11,
            11, 15, 12,
            11, 15, 12,
            12, 10, 12,
            12, 11, 12,
            12, 12, 15,
            12, 14, 11,
            12, 14, 15,
            12, 15, 11,
            12, 15, 12
            12, 15, 12,
            13, 11, 13
            13, 12, 10
            13, 12, 13,
            13, 13, 10,
            13, 13, 11,
            13, 15, 10,
            13, 15, 11,
            13, 15, 11,
            14, 10, 12,
            14, 11, 10,
            14, 11, 10,
            14, 13, 11
            14, 13, 14,
            15, 10, 12
            15, 11, 10,
            15, 11, 11,
            15, 12, 11
            ''').split('\n')
            ]))
        to_check_mons = [
            pypogo.Pokemon('Deoxys',
                           form='defense',
                           ivs=ivs,
                           moves=['Counter', 'Rock Slide',
                                  'Psycho Boost']).maximize(1500)
            for ivs in have_ivs
        ]
        meta_text = 'Great League Meta'
    elif league == 'Master40':
        league_box_target = 'Master League (Level 40)'
        meta_text = 'Master League Meta'
        # Test the effect of best buddies vs the master league
        to_check_mons = [
            pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=40),
            pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=41),
            pypogo.Pokemon('Garchomp', ivs=[15, 15, 15], level=40),
            pypogo.Pokemon('Garchomp', ivs=[15, 15, 15], level=41),
            pypogo.Pokemon('Dragonite', ivs=[15, 14, 15], level=40),
            pypogo.Pokemon('Dragonite', ivs=[15, 14, 15], level=41),
            pypogo.Pokemon('Giratina',
                           form='origin',
                           ivs=[15, 14, 15],
                           level=40),
            pypogo.Pokemon('Giratina',
                           form='origin',
                           ivs=[15, 14, 15],
                           level=41),
            pypogo.Pokemon('Kyogre', ivs=[15, 15, 14], level=40),
            pypogo.Pokemon('Kyogre', ivs=[15, 15, 14], level=41),
            pypogo.Pokemon('Groudon', ivs=[14, 14, 13], level=40),
            pypogo.Pokemon('Groudon', ivs=[14, 14, 13], level=41),
            pypogo.Pokemon('Togekiss', ivs=[15, 15, 14], level=40),
            pypogo.Pokemon('Togekiss', ivs=[15, 15, 14], level=41),
        ]
        for mon in to_check_mons:
            mon.populate_all()
    else:
        pass

    leage_select = driver.find_elements_by_class_name('league-select')[0]
    leage_select.click()
    leage_select.send_keys(league_box_target)
    leage_select.click()

    leage_select.text.split('\n')
    leage_select.send_keys('\n')
    leage_select.send_keys('\n')

    def add_pokemon(mon):
        add_poke1_button = driver.find_elements_by_class_name(
            'add-poke-btn')[0]
        add_poke1_button.click()

        select_drop = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/select')

        if 1:
            import xdev
            all_names = select_drop.text.split('\n')
            distances = xdev.edit_distance(mon.display_name(), all_names)
            chosen_name = all_names[ub.argmin(distances)]
        else:
            chosen_name = mon.name

        search_box = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/input')
        search_box.send_keys(chosen_name)

        advanced_ivs_arrow = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/a/span[1]')
        advanced_ivs_arrow.click()

        level40_cap = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[2]'
        )
        level41_cap = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[3]'
        )
        level50_cap = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[4]'
        )
        level51_cap = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[5]'
        )

        if mon.level >= 51:
            level51_cap.click()
        elif mon.level >= 50:
            level50_cap.click()
        elif mon.level >= 41:
            level41_cap.click()
        elif mon.level >= 40:
            level40_cap.click()

        level_box = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/input'
        )
        level_box.click()
        level_box.clear()
        level_box.clear()
        level_box.send_keys(str(mon.level))

        iv_a = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[1]'
        )
        iv_d = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[2]'
        )
        iv_s = driver.find_element_by_xpath(
            '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[3]'
        )

        # TODO
        # driver.find_elements_by_class_name('move-select')

        iv_a.clear()
        iv_a.send_keys(str(mon.ivs[0]))

        iv_d.clear()
        iv_d.send_keys(str(mon.ivs[1]))

        iv_s.clear()
        iv_s.send_keys(str(mon.ivs[2]))

        # USE_MOVES = 1
        if mon.moves is not None:
            # mon.populate_all()

            fast_select = driver.find_element_by_xpath(
                '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[1]')
            fast_select.click()
            fast_select.send_keys(mon.pvp_fast_move['name'])
            fast_select.send_keys(Keys.ENTER)

            charge1_select = driver.find_element_by_xpath(
                '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[2]')
            charge1_select.click()
            charge1_select.send_keys(mon.pvp_charge_moves[0]['name'])
            charge1_select.send_keys(Keys.ENTER)

            charge2_select = driver.find_element_by_xpath(
                '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[3]')
            charge2_select.click()
            charge2_select.send_keys(mon.pvp_charge_moves[1]['name'])
            charge2_select.send_keys(Keys.ENTER)

        save_button = driver.find_elements_by_class_name('save-poke')[0]
        save_button.click()

    quickfills = driver.find_elements_by_class_name('quick-fill-select')
    quickfill = quickfills[1]
    quickfill.text.split('\n')
    quickfill.click()
    quickfill.send_keys(meta_text)
    quickfill.click()

    import pypogo
    # mon1 = pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=40)
    # mon2 = pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=41)

    if 1:
        for mon in to_check_mons:
            pass
            add_pokemon(mon)

    shield_selectors = driver.find_elements_by_class_name('shield-select')
    shield_selectors[2].click()
    shield_selectors[2].send_keys('No shields')
    shield_selectors[2].send_keys(Keys.ENTER)

    shield_selectors[3].click()
    shield_selectors[3].send_keys('No shields')
    shield_selectors[3].send_keys(Keys.ENTER)

    shield_selectors[0].click()

    battle_btn = driver.find_elements_by_class_name('battle-btn')[0]
    battle_btn.click()

    # Clear previous downloaded files
    dlfolder = pathlib.Path(ub.expandpath('$HOME/Downloads'))
    for old_fpath in list(dlfolder.glob('_vs*.csv')):
        old_fpath.unlink()

    time.sleep(2.0)

    # Download new data
    dl_btn = driver.find_element_by_xpath(
        '//*[@id="main"]/div[4]/div[9]/div/a')
    dl_btn.click()

    while len(list(dlfolder.glob('_vs*.csv'))) < 1:
        pass

    new_fpaths = list(dlfolder.glob('_vs*.csv'))
    assert len(new_fpaths) == 1
    fpath = new_fpaths[0]

    data = pd.read_csv(fpath, header=0, index_col=0)

    if 1:
        # GROUP ANALYSIS
        data.sum(axis=1).sort_values()
        (data > 500).sum(axis=1).sort_values()

        flipped = []
        for key, col in data.T.iterrows():
            if not ub.allsame(col > 500):
                flipped.append(key)

        flip_df = data.loc[:, flipped]

        def color(x):
            if x > 500:
                return ub.color_text(str(x), 'green')
            else:
                return ub.color_text(str(x), 'red')

        print(flip_df.applymap(color))
        print(flip_df.columns.tolist())

        (data > 500)
    else:
        # PAIR ANALYSIS
        pairs = list(ub.iter_window(range(len(data)), step=2))
        for i, j in pairs:
            print('-----')
            matchup0 = data.iloc[i]
            matchup1 = data.iloc[j]
            delta = matchup1 - matchup0
            print(delta[delta != 0])

            wins0 = matchup0 > 500
            wins1 = matchup1 > 500
            flips = (wins0 != wins1)
            flipped_vs = matchup0.index[flips]
            num_flips = sum(flips)
            print('flipped_vs = {!r}'.format(flipped_vs))
            print('num_flips = {!r}'.format(num_flips))
            print(matchup0.mean())
            print(matchup1.mean())
            print(matchup1.mean() / matchup0.mean())
예제 #9
0
def benchmark_pathlib_vs_fspath():
    import ubelt as ub
    import pathlib
    import pandas as pd
    import random
    import timerit
    import os

    def method_pathlib(inputs):
        p = pathlib.Path(*inputs)

    def method_ospath(inputs):
        p = os.path.join(*inputs)

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(10000, bestof=10, verbose=2)

    basis = {
        'method': ['method_pathlib', 'method_ospath'],
        'num_parts': [2, 4, 8, 12, 16],
    }
    xlabel = 'num_parts'
    kw_labels = []
    group_labels = {
        'style': [],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(), kw_labels)

        n = params['num_parts']
        inputs = [chr(random.randint(97, 120)) for _ in range(n)]
        kwargs['inputs'] = inputs
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Time')
        ax.set_ylabel('Number of parts')
예제 #10
0
def test_extend():
    self = ub.oset()
    self.extend([3, 1, 2, 3])
    assert self == [3, 1, 2]
예제 #11
0
def benchmark_repeat_vs_reduce_mul():
    import ubelt as ub
    import pandas as pd
    import timerit

    def reduce_daq_rec(func, arrs):
        if len(arrs) == 1:
            return arrs[0]
        if len(arrs) == 2:
            return func(arrs[0], arrs[1])
        elif len(arrs) == 3:
            return func(func(arrs[0], arrs[1]), arrs[3])
        else:
            arrs1 = arrs[0::2]
            arrs2 = arrs[1::2]
            res1 = reduce_daq_rec(func, arrs1)
            res2 = reduce_daq_rec(func, arrs2)
            res = func(res1, res2)
        return res

    def reduce_daq_iter(func, arrs):
        """
        https://www.baeldung.com/cs/convert-recursion-to-iteration
        https://stackoverflow.com/questions/159590/way-to-go-from-recursion-to-iteration
        arrs = [2, 3, 5, 7, 11, 13, 17, 21]
        """
        raise NotImplementedError
        # TODO: make the iterative version
        from collections import deque
        empty_result = None
        stack = deque([(arrs, empty_result)])
        idx = 0
        while stack:
            print('----')
            print('stack = {}'.format(ub.repr2(list(stack), nl=1)))
            arrs0, result = stack.pop()
            if len(arrs0) == 0:
                raise Exception
            if result is not None:
                # raise Exception
                results = [result]
                while stack:
                    next_arrs0, next_result = stack.pop()
                    if next_result is None:
                        break
                    else:
                        results.append(next_result)
                if results:
                    if len(results) == 1:
                        stack.append((results, results[0]))
                    else:
                        stack.append((results, None))
                if next_result is None:
                    stack.append((next_arrs0, None))
            elif result is None:
                if len(arrs0) == 1:
                    result = arrs0[0]
                    stack.append((arrs0, result))
                    # return arrs0[0]
                if len(arrs0) == 2:
                    result = func(arrs0[0], arrs0[1])
                    stack.append((arrs0, result))
                elif len(arrs0) == 3:
                    result = func(func(arrs0[0], arrs0[1]), arrs0[3])
                    stack.append((arrs0, result))
                else:
                    arrs01 = arrs0[0::2]
                    arrs02 = arrs0[1::2]
                    stack.append((arrs0, empty_result))
                    stack.append((arrs01, empty_result))
                    stack.append((arrs02, empty_result))
                    # res1 = reduce_daq_rec(func, arrs01)
                    # res2 = reduce_daq_rec(func, arrs2)
                    # res = func(res1, res2)
            idx += 1
            if idx > 10:
                raise Exception
        return res

    def method_daq_rec(arrs):
        return reduce_daq_rec(np.multiply, arrs)

    def method_repeat(arrs):
        """
        helper code:
            arr_names = ['a{:02d}'.format(idx) for idx in range(1, 32 + 1)]
            lhs = ', '.join(arr_names)
            rhs = ' * '.join(arr_names)
            print(f'{lhs} = arrs')
            print(f'ret = {rhs}')
        """
        # Hard coded pure python syntax for multiplying
        if len(arrs) == 4:
            a01, a02, a03, a04 = arrs
            ret = a01 * a02 * a03 * a04
        elif len(arrs) == 8:
            a01, a02, a03, a04, a05, a06, a07, a08 = arrs
            ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08
        elif len(arrs) == 32:
            a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 = arrs
            ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 * a09 * a10 * a11 * a12 * a13 * a14 * a15 * a16 * a17 * a18 * a19 * a20 * a21 * a22 * a23 * a24 * a25 * a26 * a27 * a28 * a29 * a30 * a31 * a32
        return ret

    def method_reduce(arrs):
        ret = np.multiply.reduce(arrs)
        return ret

    def method_stack(arrs):
        stacked = np.stack(arrs)
        ret = stacked.prod(axis=0)
        return ret

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(10000, bestof=10, verbose=2)

    basis = {
        'method':
        ['method_repeat', 'method_reduce', 'method_stack', 'method_daq_rec'],
        'arr_size': [10, 100, 1000, 10000],
        'num_arrs': [4, 8, 32],
    }
    xlabel = 'arr_size'
    kw_labels = []
    group_labels = {
        'style': ['num_arrs'],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(), kw_labels)

        arr_size = params['arr_size']
        num_arrs = params['num_arrs']

        arrs = []
        for _ in range(num_arrs):
            arr = np.random.rand(arr_size)
            arrs.append(arr)
        kwargs['arrs'] = arrs
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Array Size')
        ax.set_ylabel('Time')
예제 #12
0
def test_frames_are_in_order():
    import kwcoco
    import ubelt as ub
    import random

    def is_sorted(x):
        return x == sorted(x)

    total_frames = 30
    total_videos = 4
    max_frames_per_video = 20

    # Seed rng for reproducibility
    rng = random.Random(926960862)

    # Initialize empty dataset
    dset = kwcoco.CocoDataset()

    # Add some number of videos
    vidid_pool = [
        dset.add_video('vid_{:03d}'.format(vididx))
        for vididx in range(total_videos)
    ]
    vidid_to_frame_pool = {
        vidid: ub.oset(range(max_frames_per_video))
        for vidid in vidid_pool
    }

    # Add some number of frames to the videos in a random order
    for imgidx in range(total_frames):
        vidid = rng.choice(vidid_pool)
        frame_pool = vidid_to_frame_pool[vidid]
        assert frame_pool, 'ran out of frames'
        frame_index = rng.choice(frame_pool)
        frame_pool.remove(frame_index)

        name = 'img_{:03d}'.format(imgidx)
        dset.add_image(video_id=vidid, frame_index=frame_index, name=name)

    # Test that our image ids are always ordered by frame ids
    vidid_to_gids = dset.index.vidid_to_gids
    gids_were_in_order = []
    for vidid, gids in vidid_to_gids.items():
        gids_were_in_order.append(is_sorted(gids))
        frame_idxs = [dset.imgs[gid]['frame_index'] for gid in gids]

        # Note: this check is always valid
        assert is_sorted(frame_idxs), (
            'images in vidid_to_gids must be sorted by frame_index')

    # Note: this check has a chance of failing for other params / seeds
    assert not all(gids_were_in_order), (
        'the probability we randomly have ordered image ids is low, '
        'and 0 when we seed the rng')

    try:
        import sqlalchemy  # NOQA
    except Exception:
        pass
    else:
        # Test that the sql view works too
        sql_dset = dset.view_sql(memory=True)

        vidid_to_gids = dict(sql_dset.index.vidid_to_gids)
        gids_were_in_order = []
        for vidid, gids in vidid_to_gids.items():
            gids_were_in_order.append(is_sorted(gids))
            frame_idxs = [dset.imgs[gid]['frame_index'] for gid in gids]

            # Note: this check is always valid
            assert is_sorted(frame_idxs), (
                'images in vidid_to_gids must be sorted by frame_index')

        # Note: this check has a chance of failing for other params / seeds
        assert not all(gids_were_in_order), (
            'the probability we randomly have ordered image ids is low, '
            'and 0 when we seed the rng')
예제 #13
0
def auto_argparse(func):
    """
    Transform a function with a Google Style Docstring into an
    `argparse.ArgumentParser`.

    TODO:
        - [ ] Handle booleans consistently, allow --flag=True and --flag=False

    Args:
        func (callable): function with kwargs
    """
    from xdoctest.docstr import docscrape_google as scrape
    import ast
    import argparse
    import ubelt as ub
    import inspect
    spec = inspect.getargspec(func)

    # Parse default values from the function dynamically
    try:
        import xinspect
        kwdefaults = xinspect.get_func_kwargs(func)
    except Exception as ex:
        raise
        kwdefaults = dict(zip(spec.args[-len(spec.defaults):], spec.defaults))

    # Parse help and description information from a google-style docstring
    docstr = func.__doc__
    description = scrape.split_google_docblocks(docstr)[0][1][0].strip()

    # TODO: allow scraping from the kwargs block as well
    google_args = {
        argdict['name']: argdict
        for argdict in scrape.parse_google_args(docstr)
    }

    argnames = ub.oset(spec.args) | ub.oset(kwdefaults)
    argnames = (ub.oset(google_args) & argnames) | argnames

    # DEBUG = 1
    # if DEBUG:
    #     print(ub.repr2(google_args))

    # Create the argument parser and register each argument
    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    for arg in argnames:
        argkw = {}
        if arg in kwdefaults:
            argkw['default'] = kwdefaults[arg]
        if arg in google_args:
            garg = google_args[arg]
            argkw['help'] = garg['desc']
            # print('-----')
            # print('argkw = {}, {}'.format(arg, ub.repr2(argkw)))
            try:
                if garg['type'] == 'PathLike':
                    argkw['type'] = str
                elif garg['type'] == 'bool':

                    def _parse_bool(s):
                        return bool(ast.literal_eval(s))

                    argkw['type'] = _parse_bool
                else:
                    argkw['type'] = eval(garg['type'], {})
                    # literal_eval doesnt handle types
                    # argkw['type'] = ast.literal_eval(garg['type'])
            except Exception as ex:
                # print('{}, ex = {!r}'.format(arg, ex))
                pass
        # print('-----')
        # print('argkw = {}, {!r}'.format(arg, argkw))
        parser.add_argument('--' + arg, **argkw)
    return parser
예제 #14
0
    def make_optimizer(hyper, named_parameters):
        """
        Instantiate the optimizer defined by the hyperparams

        Contains special logic to create param groups

        Example:
            >>> import netharn as nh
            >>> config = {'optimizer': 'sgd', 'params': [
            >>>     {'lr': 3e-3, 'params': '.*\\.bias'},
            >>>     {'lr': 1e-3, 'params': '.*\\.weight'},
            >>>     #{'lr': 100, 'params': '.*\\.doesnotmatch'},
            >>> ]}
            >>> optim_ = nh.api.Optimizer.coerce(config)
            >>> hyper = nh.HyperParams(optimizer=optim_)
            >>> model = nh.models.ToyNet1d()
            >>> named_parameters = list(model.named_parameters())
            >>> optimizer = hyper.make_optimizer(named_parameters)
            >>> print('optimizer = {!r}'.format(optimizer))
        """
        if hyper._optimizer_info['instance'] is not None:
            return hyper._optimizer_info['instance']
        # What happens if we want to group parameters
        optim_kw = hyper.optimizer_params.copy()
        params = optim_kw.pop('params', None)
        if params is None:
            param_groups = [p for (name, p) in named_parameters]
        else:
            import re
            named_parameters = list(named_parameters)
            name_to_param = dict(named_parameters)
            param_groups = []
            if isinstance(params, dict):
                # remember the group key
                groups = [{'key': k, **g} for k, g in params.items()]
            if isinstance(params, list):
                groups = params

            PREVENT_DUPLICATES = 1

            seen_ = set()
            for group in groups:
                # Transform param grouping specifications into real params
                group = group.copy()
                spec = group.pop('params')
                if isinstance(spec, list):
                    if len(spec):
                        first = ub.peek(spec)
                        if isinstance(first, str):
                            real_params = [name_to_param[k] for k in spec]
                        elif isinstance(first, torch.nn.Parameter):
                            real_params = spec
                        else:
                            raise TypeError(type(first))
                    else:
                        real_params = []

                # Python 3.6 doesn't have re.Pattern
                elif isinstance(spec, str) or hasattr(spec, 'match'):
                    if hasattr(spec, 'match'):
                        pat = spec
                    else:
                        pat = re.compile(spec)
                    real_params = [
                        p for name, p in name_to_param.items()
                        if pat.match(name)
                    ]
                else:
                    raise TypeError(type(spec))

                if PREVENT_DUPLICATES:
                    # give priority to earlier params
                    # This is Python 3.6+ only
                    real_params = list(ub.oset(real_params) - seen_)
                    seen_.update(real_params)

                group['params'] = real_params
                param_groups.append(group)

            CHECK = 1
            if CHECK:
                # Determine if we are using the same param more than once
                # or if we are not using a param at all.
                # NOTE: torch does do a duplicate check.
                param_group_ids = []
                for group in param_groups:
                    ids = list(map(id, group['params']))
                    param_group_ids.append(ids)

                all_param_ids = [id(p) for n, p in named_parameters]
                flat_ids = list(ub.flatten(param_group_ids))
                freq = ub.dict_hist(flat_ids, labels=all_param_ids)
                num_unused = any(v == 0 for v in freq.values())
                num_dups = any(v > 1 for v in freq.values())
                if num_unused:
                    warnings.warn(
                        'There are {} unused params'.format(num_unused))
                if num_dups:
                    warnings.warn(
                        'There are {} duplicate params'.format(num_dups))

        optimizer = hyper.optimizer_cls(param_groups, **optim_kw)
        return optimizer
예제 #15
0
def benchmark_unpack():
    """
    What is faster unpacking items with slice syntax or tuple-unpacking

    Slice unpacking seems to be a tad faster.
    """
    import ubelt as ub
    import random
    import pandas as pd
    import timerit
    import string

    def tuple_unpack(items):
        *prefix, key = items
        return prefix, key

    def slice_unpack(items):
        prefix, key = items[:-1], items[-1]
        return prefix, key

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(5000, bestof=3, verbose=2)

    basis = {
        'method': ['tuple_unpack', 'slice_unpack'],
        'size': list(range(1, 64 + 1)),
        'type': ['string', 'float'],
    }
    xlabel = 'size'
    kw_labels = []
    group_labels = {
        'style': ['type'],
        'size': [],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        size = params['size']
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            if type == 'string':
                items = [
                    ''.join(random.choices(string.printable, k=5))
                    for _ in range(size)
                ]
            elif type == 'float':
                items = [random.random() for _ in range(size)]
            with timer:
                method(items)
        for time in ti.times:
            row = {
                'time': time,
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('time')
    summary_rows = []
    for method, group in data.groupby('method'):
        row = {}
        row['method'] = method
        row['mean'] = group['time'].mean()
        row['std'] = group['time'].std()
        row['min'] = group['time'].min()
        row['max'] = group['time'].max()
        summary_rows.append(row)
    print(pd.DataFrame(summary_rows).sort_values('mean'))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y='time',
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('Execution time')
        ax.set_ylabel('Size of slices')
예제 #16
0
def load_partial_state(model,
                       model_state_dict,
                       leftover=None,
                       ignore_unset=False,
                       verbose=2,
                       mangle=True,
                       association=None,
                       initializer=None):
    """
    CommandLine:
        python -m netharn.initializers.nninit_base load_partial_state

    Args:
        model (torch.nn.Module): module to initialize

        model_state_dict (dict): state dict we wish to transfer

        leftover (callable): fallback method for initializing incompatible
             areas, if none then those areas are left as-is.

        association (str): controls how we search for the association between
            the two model states. Can be strict, module-hack, prefix-hack, or
            embedding.  Default is: prefix-hack.

        mangle (bool, default=True): If True, mangles tensors that have the
            same key, but different shapes forcing them to fit. This might
            destroy information when forcing a a larger tensor into a smaller
            tensor, or leave extra uninitialized room when a small tensor is
            placed in a larger one. Note be careful when mangling a
            classification layer if class indexes are not aligned.

        verbose (int): verbosity level

    Returns:
        Dict: info - summary of actions taken

    TODO:
        - [ ] Allow user to specify how incompatible layers are handled.

    Notes:

        Have you ever had the scenario where

        Has anyone ever had a problem where you had a torch model with a state
        dict with keys that looked like: `mymodel.detector.layer1.conv.weight`,
        but you had a pretrained weight file with keys that looked like:
        `module.layer1.conv.weight`?

        The latest version of
        `netharn.initializers.functional.load_patial_state` can handle this by
        solving a maximum-common-subtree-isomorphism problem. This computes the
        largest possible mapping between the two state dictionaries that share
        consistent suffixes.

        >>> # This means you can load an off-the-shelf unmodified pretrained resnet50
        >>> # where the keys might look something like this:
        >>> resnet_keys = {
        >>>     'conv1.weight',
        >>>     'layer1.0.conv1.weight',
        >>>     'layer1.0.conv2.weight',
        >>>     'layer1.0.conv3.weight',
        >>>     'layer1.0.downsample.0.weight',
        >>>     'layer2.0.conv1.weight',
        >>>     'layer2.0.conv2.weight',
        >>>     'layer2.0.conv3.weight',
        >>>     'layer3.0.conv1.weight',
        >>>     'layer4.0.conv1.weight',
        >>>     'fc.weight',
        >>>     'fc.bias',
        >>> }
        >>> #
        >>> # And perhaps you have a model that has a state dict where keys
        >>> # look like this:
        >>> model_keys = {
        >>>     'preproc.conv1.weight'
        >>>     'backbone.layer1.0.conv1.weight',
        >>>     'backbone.layer1.0.conv2.weight',
        >>>     'backbone.layer1.0.conv3.weight',
        >>>     'backbone.layer1.0.downsample.0.weight',
        >>>     'backbone.layer2.0.conv1.weight',
        >>>     'backbone.layer2.0.conv2.weight',
        >>>     'backbone.layer2.0.conv3.weight',
        >>>     'backbone.layer3.0.conv1.weight',
        >>>     'backbone.layer4.0.conv1.weight',
        >>>     'head.conv1'
        >>>     'head.conv2'
        >>>     'head.fc.weight'
        >>>     'head.fc.bias'
        >>> }
        >>> #
        >>> # We can compute a partial mapping between them
        >>> subpaths1, subpaths2 = maximum_common_ordered_subpaths(resnet_keys, model_keys)
        >>> print(ub.repr2(ub.dzip(subpaths1, subpaths2)))
        {
            'layer1.0.conv2.weight':        'backbone.layer1.0.conv2.weight',
            'layer1.0.conv3.weight':        'backbone.layer1.0.conv3.weight',
            'layer1.0.downsample.0.weight': 'backbone.layer1.0.downsample.0.weight',
            'layer2.0.conv1.weight':        'backbone.layer2.0.conv1.weight',
            'layer2.0.conv2.weight':        'backbone.layer2.0.conv2.weight',
            'layer2.0.conv3.weight':        'backbone.layer2.0.conv3.weight',
            'layer3.0.conv1.weight':        'backbone.layer3.0.conv1.weight',
            'layer4.0.conv1.weight':        'backbone.layer4.0.conv1.weight',
        }

        Also, if the sizes of the tensor don't quite fit, they will be
        mangled, i.e. "shoved-in" as best as possible.


    Example:
        >>> import netharn as nh
        >>> self1 = nh.models.ToyNet2d(input_channels=1, num_classes=10)
        >>> self2 = nh.models.ToyNet2d(input_channels=3, num_classes=2)
        >>> self1.hack_param1 = torch.nn.Parameter(torch.rand(1))
        >>> self2.hack_param1 = torch.nn.Parameter(torch.rand(3))
        >>> self2.hack_param2 = torch.nn.Parameter(torch.rand(3))
        >>> model_state_dict = self1.state_dict()
        >>> load_partial_state(self2, model_state_dict)
        >>> load_partial_state(self2, model_state_dict, leftover=torch.nn.init.kaiming_normal_)

    Example:
        >>> import netharn as nh
        >>> xpu = nh.XPU(None)
        >>> self1 = nh.models.ToyNet2d()
        >>> self2 = xpu.mount(self1)
        >>> load_partial_state(self2, self1.state_dict())
        >>> load_partial_state(self1, self2.state_dict())
        >>> # Add extra nonsense to state-dict
        >>> extra_state_dict = {'extra.' + k: v for k, v in self1.state_dict().items()}
        >>> extra_state_dict['stats'] = ub.peek(extra_state_dict.values()).clone()
        >>> model = self2
        >>> model_state_dict = extra_state_dict
        >>> load_partial_state(self2, extra_state_dict)

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> from netharn.initializers.functional import *  # NOQA
        >>> import torchvision
        >>> import torch
        >>> resnet50 = torchvision.models.resnet50()
        >>> class CustomModel(torch.nn.Module):
        >>>     def __init__(self):
        >>>         super().__init__()
        >>>         self.module = resnet50
        >>>         self.extra = torch.nn.Linear(1, 1)
        >>> model = CustomModel()
        >>> model_state_dict = resnet50.state_dict()
        >>> model_state_dict2 = {'prefix.' + k: v for k, v in model_state_dict.items()}
        >>> import ubelt as ub
        >>> with ub.Timer(verbose=2, label='strict'):
        >>>     load_partial_state(model, model_state_dict, association='strict', verbose=0)
        >>> with ub.Timer(verbose=2, label='prefix-hack'):
        >>>     load_partial_state(model, model_state_dict, association='prefix-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='module-hack'):
        >>>     load_partial_state(model, model_state_dict, association='module-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='embedding'):
        >>>     load_partial_state(model, model_state_dict, association='embedding', verbose=0)

        >>> load_partial_state(model, model_state_dict, association='prefix-hack', verbose=1)
        >>> load_partial_state(model, model_state_dict, association='module-hack', verbose=1)

    CommandLine:
        xdoctest -m /home/joncrall/code/netharn/netharn/initializers/functional.py load_partial_state:2 --slow

    """
    if association is None:
        association = 'module-hack'  # old default
        # association = 'prefix-hack'  # new default

    if initializer is not None:
        import warnings
        warnings.warn('initializer is deprecated use leftover')
        leftover = initializer

    self_state = model.state_dict()

    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)
        common_keys = other_keys.intersection(self_keys)
        if not common_keys:
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association == 'embedding':
                if verbose > 1:
                    print(
                        'Using subpath embedding assocation, may take some time'
                    )
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)
                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1, paths2)
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                               model_state_dict)
            else:
                raise KeyError(association)
        return model_state_dict

    other_state = _fix_keys(model_state_dict)

    self_unset_keys = set(
        self_state.keys())  # will end up as keys in our that were not set
    other_unused_keys = set(other_state.keys(
    ))  # will end up as keys in the other model that were not used

    seen_keys = ub.ddict(set)

    for key, other_value in other_state.items():
        if key not in self_state:
            if verbose > 0:
                print('Skipping {} because it does not exist'.format(key))
            seen_keys['skipped'].add(key)
        else:
            self_value = self_state[key]
            if other_value.size() == self_value.size():
                self_state[key] = other_value
                self_unset_keys.remove(key)
                other_unused_keys.remove(key)
                seen_keys['full_add'].add(key)
            elif len(other_value.size()) == len(self_value.size()):
                if not mangle:
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size and mangle=False'
                            .format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                elif key.endswith('bias'):
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size'.format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                else:
                    if leftover is None:
                        if verbose > 0:
                            print(
                                'Skipping {} due to incompatable size and no default initializer'
                                .format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        seen_keys['skipped'].add(key)
                    else:
                        if verbose > 0:
                            print('Partially add {} with incompatable size'.
                                  format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        # Initialize all weights in case any are unspecified
                        if leftover is None:
                            try:
                                leftover(self_state[key])
                            except Exception:
                                if verbose > 0:
                                    print('Unable to init {} with {}'.format(
                                        key, leftover))

                        # Transfer as much as possible
                        min_size = np.minimum(self_state[key].shape,
                                              other_value.shape)
                        sl = tuple([slice(0, s) for s in min_size])
                        self_state[key][sl] = other_value[sl]

                        # if shock_partial:
                        #     # Shock weights because we are doing something weird
                        #     # might help the network recover in case this is
                        #     # not a good idea
                        #     shock(self_state[key], func=leftover)
                        self_unset_keys.remove(key)
                        other_unused_keys.remove(key)

                        if self_state[key].numel() < other_value.numel():
                            seen_keys['partial_add_some'].add(key)
                        else:
                            seen_keys['partial_add_all'].add(key)
            else:
                if verbose > 0:
                    print('Skipping {} due to incompatable size'.format(key))
                    print(' * self  = {!r}'.format(self_value.size()))
                    print(' * other = {!r}'.format(other_value.size()))
                seen_keys['skipped'].add(key)

    if ignore_unset is True:
        self_unset_keys = []
    elif ignore_unset:
        self_unset_keys = list(ub.oset(self_unset_keys) - set(ignore_unset))

    if (self_unset_keys or other_unused_keys or seen_keys['partial_add_some']
            or seen_keys['partial_add_all']):
        if verbose > 0:
            if seen_keys:
                print('Pretrained weights are a partial fit')
            else:
                print('Pretrained weights do not fit!')
        if verbose > 1:
            print('Seen Keys: {}'.format(ub.repr2(seen_keys, nl=2)))
            print('Self Unset Keys: {}'.format(ub.repr2(self_unset_keys,
                                                        nl=1)))
            print('Other Unused keys: {}'.format(
                ub.repr2(other_unused_keys, nl=1)))
            print('summary:')
            seen_sum = ub.map_vals(len, seen_keys)
            print('Seen Num: {}'.format(ub.repr2(seen_sum, nl=2)))
            print('Self Unset Num: {}'.format(
                ub.repr2(len(self_unset_keys), nl=1)))
            print('Other Unused Num: {}'.format(
                ub.repr2(len(other_unused_keys), nl=1)))
        if leftover:
            if verbose > 0:
                print('Initializing unused keys using {}'.format(leftover))
            for key in self_unset_keys:
                if key.endswith('.num_batches_tracked'):
                    pass  # ignore num_batches_tracked
                elif key.endswith('.bias'):
                    self_state[key].fill_(0)
                else:
                    try:
                        leftover(self_state[key])
                    except Exception:
                        if verbose > 0:
                            print('Unable to init {} with {}'.format(
                                key, leftover))

    else:
        if verbose > 0:
            print('Pretrained weights are a perfect fit')
    model.load_state_dict(self_state)

    info = {
        'seen': seen_keys,
        'self_unset': self_unset_keys,
        'other_unused': other_unused_keys
    }
    return info
예제 #17
0
def benchamrk_det_nms():
    """
    Benchmarks different implementations of non-max-supression on the CPU, GPU,
    and using cython / numpy / torch.

    CommandLine:
        xdoctest -m ~/code/kwimage/dev/bench_nms.py benchamrk_det_nms --show

    SeeAlso:
        PJR Darknet NonMax supression
        https://github.com/pjreddie/darknet/blob/master/src/box.c

        Lightnet NMS
        https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116
    """

    # N = 200
    # bestof = 50
    N = 1
    bestof = 1

    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000]

    # max number of boxes yolo will spit out at a time
    max_boxes = 19 * 19 * 5

    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500,
        max_boxes
    ]
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500]

    # Demo values
    xdata = [0, 1, 2, 3, 10, 100, 200, 300, 500]

    if ub.argflag('--small'):
        xdata = [10, 100, 500, 1000, 1500, 2000, 5000, 10000]

    if ub.argflag('--medium'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
        ]

    if ub.argflag('--large'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
            100000,
        ]

    if ub.argflag('--extra-large'):
        xdata = [
            1000,
            2000,
            10000,
            20000,
            40000,
            100000,
            200000,
        ]

    title_parts = []

    SMALL_BOXES = ub.argflag('--small-boxes')
    if SMALL_BOXES:
        title_parts.append('small boxes')
    else:
        title_parts.append('large boxes')

    # NOTE: for large images we may have up to 21,850,753 detections!

    thresh = float(ub.argval('--thresh', default=0.4))
    title_parts.append('thresh={:.2f}'.format(thresh))

    from kwimage.algo.algo_nms import available_nms_impls
    valid_impls = available_nms_impls()
    print('valid_impls = {!r}'.format(valid_impls))

    basis = {
        'type': ['ndarray', 'tensor', 'tensor0'],
        # 'daq': [True, False],
        # 'daq': [False],
        # 'device': [None],
        # 'impl': valid_impls,
        'impl': valid_impls + ['auto'],
    }

    if ub.argflag('--daq'):
        basis['daq'] = [True, False]

    # if torch.cuda.is_available():
    #     basis['device'].append(0)

    combos = [
        ub.dzip(basis.keys(), vals) for vals in it.product(*basis.values())
    ]

    def is_valid_combo(combo):
        # if combo['impl'] in {'py', 'cython_cpu'} and combo['device'] is not None:
        #     return False
        # if combo['type'] == 'ndarray' and combo['impl'] == 'cython_gpu':
        #     if combo['device'] is None:
        #         return False
        # if combo['type'] == 'ndarray' and combo['impl'] != 'cython_gpu':
        #     if combo['device'] is not None:
        #         return False

        # if combo['type'].endswith('0'):
        #     if combo['impl'] in {'numpy', 'cython_gpu', 'cython_cpu'}:
        #         return False

        # if combo['type'] == 'ndarray':
        #     if combo['impl'] in {'torch'}:
        #         return False

        REMOVE_SLOW = True
        if REMOVE_SLOW:
            known_bad = [
                {
                    'impl': 'torch',
                    'type': 'tensor'
                },
                {
                    'impl': 'numpy',
                    'type': 'tensor'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor'},
                {
                    'impl': 'cython_cpu',
                    'type': 'tensor'
                },

                # {'impl': 'torch', 'type': 'tensor0'},
                {
                    'impl': 'numpy',
                    'type': 'tensor0'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor0'},
                # {'impl': 'cython_cpu', 'type': 'tensor0'},
                {
                    'impl': 'torchvision',
                    'type': 'ndarray'
                },
            ]
            for known in known_bad:
                if all(combo[key] == val for key, val in known.items()):
                    return False

        return True

    combos = list(filter(is_valid_combo, combos))

    times = ub.ddict(list)
    for num in xdata:

        if num > 10000:
            N = 1
            bestof = 1
        if num > 1000:
            N = 3
            bestof = 1
        if num > 100:
            N = 10
            bestof = 3
        elif num > 10:
            N = 100
            bestof = 10
        else:
            N = 1000
            bestof = 10
        print('\n\n---- number of boxes = {} ----\n'.format(num))

        outputs = {}

        ti = ub.Timerit(N, bestof=bestof, verbose=1)

        # Build random test boxes and scores
        np_dets1 = kwimage.Detections.random(num // 2, scale=1000.0, rng=0)
        np_dets1.data['boxes'] = np_dets1.boxes.to_xywh()

        if SMALL_BOXES:
            max_dim = 100
            np_dets1.boxes.data[..., 2] = np.minimum(np_dets1.boxes.width,
                                                     max_dim).ravel()
            np_dets1.boxes.data[..., 3] = np.minimum(np_dets1.boxes.height,
                                                     max_dim).ravel()

        np_dets2 = copy.deepcopy(np_dets1)
        np_dets2.boxes.translate(10, inplace=True)
        # add boxes that will definately be removed
        np_dets = kwimage.Detections.concatenate([np_dets1, np_dets2])

        # make all scores unique to ensure comparability
        np_dets.scores[:] = np.linspace(0, 1, np_dets.num_boxes())

        np_dets.data['scores'] = np_dets.scores.astype(np.float32)
        np_dets.boxes.data = np_dets.boxes.data.astype(np.float32)

        typed_data = {}
        # ----------------------------------

        import netharn as nh
        for combo in combos:
            print('combo = {}'.format(ub.repr2(combo, nl=0)))

            label = nh.util.make_idstr(combo)
            mode = combo.copy()

            # if mode['impl'] == 'cython_gpu':
            #     mode['device_id'] = mode['device']

            mode_type = mode.pop('type')

            if mode_type in typed_data:
                dets = typed_data[mode_type]
            else:
                if mode_type == 'ndarray':
                    dets = np_dets.numpy()
                elif mode_type == 'tensor':
                    dets = np_dets.tensor(None)
                elif mode_type == 'tensor0':
                    dets = np_dets.tensor(0)
                else:
                    raise KeyError
                typed_data[mode_type] = dets

            for timer in ti.reset(label):
                with timer:
                    keep = dets.non_max_supression(thresh=thresh, **mode)
                    torch.cuda.synchronize()
            times[ti.label].append(ti.min())
            outputs[ti.label] = ensure_numpy_indices(keep)

        # ----------------------------------

        # Check that all kept boxes do not have more than `threshold` ious
        if 0:
            for key, keep_idxs in outputs.items():
                kept = np_dets.take(keep_idxs).boxes
                ious = kept.ious(kept)
                max_iou = (np.tril(ious) - np.eye(len(ious))).max()
                if max_iou > thresh:
                    print('{} produced a bad result with max_iou={}'.format(
                        key, max_iou))

        # Check result consistency:
        print('\nResult stats:')
        for key in sorted(outputs.keys()):
            print('    * {:<20}: num={}'.format(key, len(outputs[key])))

        print('\nResult overlaps (method1, method2: jaccard):')
        datas = []
        for k1, k2 in it.combinations(sorted(outputs.keys()), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / max(len(idxs1 | idxs2), 1)
            datas.append((k1, k2, jaccard))

        datas = sorted(datas, key=lambda x: -x[2])
        for k1, k2, jaccard in datas:
            print('    * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard))

    if True:
        ydata = {key: 1.0 / np.array(vals) for key, vals in times.items()}
        ylabel = 'Hz'
        reverse = True
        yscale = 'symlog'
    else:
        ydata = {key: np.array(vals) for key, vals in times.items()}
        ylabel = 'seconds'
        reverse = False
        yscale = 'linear'
    scores = {key: vals[-1] for key, vals in ydata.items()}
    ydata = ub.dict_subset(ydata, ub.argsort(scores, reverse=reverse))

    ###
    times_of_interest = [0, 10, 100, 200, 1000]
    times_of_interest = xdata

    lines = []
    record = lines.append
    record('### times_of_interest = {!r}'.format(times_of_interest))
    for x in times_of_interest:

        if times_of_interest[-1] == x:
            record('else:')
        elif times_of_interest[0] == x:
            record('if num <= {}:'.format(x))
        else:
            record('elif num <= {}:'.format(x))

        if x in xdata:
            pos = xdata.index(x)
            score_wrt_x = {}
            for key, vals in ydata.items():
                score_wrt_x[key] = vals[pos]

            typekeys = ['tensor0', 'tensor', 'ndarray']
            type_groups = dict([(b,
                                 ub.group_items(score_wrt_x,
                                                lambda y: y.endswith(b))[True])
                                for b in typekeys])
            # print('\n=========')
            # print('x = {!r}'.format(x))
            record('    if code not in {!r}:'.format(set(typekeys)))
            record('        raise KeyError(code)')
            for typekey, group in type_groups.items():
                # print('-------')
                record('    if code == {!r}:'.format(typekey))
                # print('typekey = {!r}'.format(typekey))
                # print('group = {!r}'.format(group))
                group_x = ub.dict_isect(score_wrt_x, group)
                valid_keys = ub.argsort(group_x, reverse=True)
                valid_x = ub.dict_subset(group_x, valid_keys)
                # parts = [','.split(k) for k in valid_keys]
                ordered_impls = []
                ordered_impls2 = ub.odict()
                for k in valid_keys:
                    vals = valid_x[k]
                    p = k.split(',')
                    d = dict(i.split('=') for i in p)
                    ordered_impls2[d['impl']] = vals
                    ordered_impls.append(d['impl'])

                ordered_impls = list(ub.oset(ordered_impls) - {'auto'})
                ordered_impls2.pop('auto')
                record('        # {}'.format(
                    ub.repr2(ordered_impls2, precision=1, nl=0,
                             explicit=True)))
                record('        preference = {}'.format(
                    ub.repr2(ordered_impls, nl=0)))
    record('### end times of interest ')
    print(ub.indent('\n'.join(lines), ' ' * 8))
    ###

    markers = {
        key: 'o' if 'auto' in key else ''
        for key, score in scores.items()
    }

    if ub.argflag('--daq'):
        markers = {
            key: '+' if 'daq=True' in key else ''
            for key, score in scores.items()
        }

    labels = {
        key: '{:.2f} {} - {}'.format(score, ylabel[0:3], key)
        for key, score in scores.items()
    }

    title = 'NSM-impl speed: ' + ', '.join(title_parts)

    import kwplot
    kwplot.autompl()
    kwplot.multi_plot(
        xdata,
        ydata,
        xlabel='num boxes',
        ylabel=ylabel,
        label=labels,
        yscale=yscale,
        title=title,
        marker=markers,
        # xscale='symlog',
    )

    kwplot.show_if_requested()
예제 #18
0
def ensure_selenium_chromedriver():
    """
    os.environ['webdriver.chrome.driver'] = ensure_selenium_chromedriver()
    """
    import requests
    import zipfile
    timeout = 5.0

    def latest_version():
        rsp = requests.get(
            'http://chromedriver.storage.googleapis.com/LATEST_RELEASE',
            timeout=timeout)
        if rsp.status_code != 200:
            raise Exception
        version = rsp.text.strip()
        return version

    # version = latest_version()
    # version = '91.0.4472.19'
    # version = '90.0.4430.24'
    version = '92.0.4515.107'

    known_hashs = {
        '91.0.4472.19': '49622b740b1c7e66b87179a2642f6c57f21a97fc844c84b30a48',
        '90.0.4430.24': 'b85313de6abc1b44f26a0e12e20cb66657b840417f5ac6018946',
        '92.0.4515.107': '844c0e04bbbfd286617af2d7facd3d6cf7d3491b1e78120f8e0',
    }
    url = 'http://chromedriver.storage.googleapis.com/{}/chromedriver_linux64.zip'.format(
        version)
    bin_dpath = pathlib.Path(ub.expandpath('~/.local/bin'))
    download_dpath = bin_dpath / f'chromedriver_{version}'
    download_dpath.mkdir(exist_ok=True, parents=True)

    zip_fpath = ub.grabdata(
        url,
        hash_prefix=known_hashs.get(version, 'unknown-version'),
        dpath=download_dpath,
    )
    zip_fpath = pathlib.Path(zip_fpath)
    # dpath = zip_fpath.parent

    # TODO: version the binary
    chromedriver_fpath_real = download_dpath / 'chromedriver'
    chromedriver_fpath_link = bin_dpath / 'chromedriver'

    if not chromedriver_fpath_real.exists(
    ) or not chromedriver_fpath_link.exists():
        # Also check hash?

        zfile = zipfile.ZipFile(str(zip_fpath))
        try:
            fpath = zfile.extract('chromedriver',
                                  path=chromedriver_fpath_real.parent)
        finally:
            zfile.close()

        chromedriver_fpath_real_ = pathlib.Path(fpath)
        assert chromedriver_fpath_real_.exists()
        ub.symlink(chromedriver_fpath_real_,
                   chromedriver_fpath_link,
                   overwrite=True)

        if not ub.WIN32:
            print('add permission chromedriver_fpath_real_ = {!r}'.format(
                chromedriver_fpath_real_))
            st = os.stat(chromedriver_fpath_real_)
            os.chmod(chromedriver_fpath_real_, st.st_mode | stat.S_IEXEC)

        os.environ['PATH'] = os.pathsep.join(
            ub.oset(os.environ['PATH'].split(os.pathsep))
            | ub.oset([str(chromedriver_fpath_link.parent)]))
    return chromedriver_fpath_link
예제 #19
0
def _heuristic_auto_nms_impl(code, num, valid=None):
    """
    Defined with help from ``~/code/kwimage/dev/bench_nms.py``

    Args:
        code (str): text that indicates which type of data you have
            tensor0 is a tensor on a cuda device, tensor is on the cpu, and
            numpy is a ndarray.

        num (int): number of boxes you have to supress.

        valid (List[str]): the list of valid implementations, an error will be
            raised if heuristic preferences do not intersect with this list.

    Ignore:
        _impls._funcs
        valid_pref = ub.oset(preference) & set(_impls._funcs.keys())
        python ~/code/kwimage/dev/bench_nms.py --show --small-boxes --thresh=0.6
    """
    if code not in {'tensor0', 'tensor', 'ndarray'}:
        raise KeyError(code)

    if num <= 10:
        if code == 'tensor0':
            # dict(cython_cpu=4118.4, torchvision=3042.5, cython_gpu=2244.4, torch=841.9)
            preference = ['cython_cpu', 'torchvision', 'cython_gpu', 'torch']
        if code == 'tensor':
            # dict(torchvision=5857.1, cython_gpu=3058.1)
            preference = ['torchvision', 'cython_gpu', 'torch', 'numpy']
        if code == 'ndarray':
            # dict(cython_cpu=12226.1, numpy=7759.1, cython_gpu=3679.0, torch=1786.2)
            preference = ['cython_cpu', 'numpy', 'cython_gpu', 'torch']
    elif num <= 100:
        if code == 'tensor0':
            # dict(cython_cpu=4160.7, torchvision=3089.9, cython_gpu=2261.8, torch=846.8)
            preference = [
                'cython_cpu', 'torchvision', 'cython_gpu', 'torch', 'numpy'
            ]
        if code == 'tensor':
            # dict(torchvision=5875.3, cython_gpu=3076.9)
            preference = ['torchvision', 'cython_gpu', 'torch', 'numpy']
        if code == 'ndarray':
            # dict(cython_cpu=12256.7, cython_gpu=3702.9, numpy=2311.3, torch=1738.0)
            preference = ['cython_cpu', 'cython_gpu', 'numpy', 'torch']
    elif num <= 200:
        if code == 'tensor0':
            # dict(cython_cpu=3460.8, torchvision=2912.9, cython_gpu=2125.2, torch=782.4)
            preference = ['cython_cpu', 'torchvision', 'cython_gpu', 'torch']
        if code == 'tensor':
            # dict(torchvision=3394.6, cython_gpu=2641.2)
            preference = ['torchvision', 'cython_gpu', 'torch', 'numpy']
        if code == 'ndarray':
            # dict(cython_cpu=8220.6, cython_gpu=3114.5, torch=1240.7, numpy=309.5)
            preference = ['cython_cpu', 'cython_gpu', 'torch', 'numpy']
    elif num <= 300:
        if code == 'tensor0':
            # dict(torchvision=2647.1, cython_cpu=2264.9, cython_gpu=1915.5, torch=672.0)
            preference = ['torchvision', 'cython_cpu', 'cython_gpu', 'torch']
        if code == 'tensor':
            # dict(cython_gpu=2496.9, torchvision=1781.1)
            preference = ['cython_gpu', 'torchvision', 'torch', 'numpy']
        if code == 'ndarray':
            # dict(cython_cpu=4085.6, cython_gpu=2944.4, torch=799.8, numpy=173.0)
            preference = ['cython_cpu', 'cython_gpu', 'torch', 'numpy']
    else:
        if code == 'tensor0':
            # dict(torchvision=2585.5, cython_gpu=1868.7, cython_cpu=1650.6, torch=623.1)
            preference = ['torchvision', 'cython_gpu', 'cython_cpu', 'torch']
        if code == 'tensor':
            # dict(cython_gpu=2463.1, torchvision=1126.2)
            preference = ['cython_gpu', 'torchvision', 'torch', 'numpy']
        if code == 'ndarray':
            # dict(cython_gpu=2880.2, cython_cpu=2432.5, torch=511.9, numpy=114.0)
            preference = ['cython_gpu', 'cython_cpu', 'torch', 'numpy']

    if valid:
        valid_pref = ub.oset(preference) & valid
    else:
        valid_pref = preference

    if not valid_pref:
        raise Exception(
            'no valid nms algo: code={}, num={}, valid={}, preference={}, valid_pref={}'
            .format(code, num, valid, preference, valid_pref))

    impl = valid_pref[0]
    return impl
예제 #20
0
def benchmark_dict_diff_impl():
    import ubelt as ub
    import pandas as pd
    import timerit
    import random

    def method_diffkeys(*args):
        first_dict = args[0]
        keys = set(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_diffkeys_list(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        keep_keys = [k for k in first_dict.keys() if k not in remove_keys]
        new = dict((k, first_dict[k]) for k in keep_keys)
        return new

    def method_diffkeys_oset(*args):
        first_dict = args[0]
        keys = ub.oset(first_dict)
        keys.difference_update(*map(set, args[1:]))
        new0 = dict((k, first_dict[k]) for k in keys)
        return new0

    def method_ifkeys_setcomp(*args):
        first_dict = args[0]
        remove_keys = {k for ks in args[1:] for k in ks}
        new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new1

    def method_ifkeys_setunion(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys)
        return new2

    def method_ifkeys_getitem(*args):
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys)
        return new3

    def method_ifkeys_dictcomp(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: v for k, v in first_dict.items() if k not in remove_keys}
        return new4

    def method_ifkeys_dictcomp_getitem(*args):
        # Cannot use until 3.6 is dropped (it is faster)
        first_dict = args[0]
        remove_keys = set.union(*map(set, args[1:]))
        new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys}
        return new4

    method_lut = locals()  # can populate this some other way

    def make_data(num_items, num_other, remove_fraction, keytype):
        if keytype == 'str':
            keytype = str
        if keytype == 'int':
            keytype = int
        first_keys = [random.randint(0, 1000) for _ in range(num_items)]
        k = int(remove_fraction * len(first_keys))
        remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)]
        first_dict = {keytype(k): k for k in first_keys}
        args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets]
        return args

    ti = timerit.Timerit(200, bestof=1, verbose=2)

    basis = {
        'method': [
            # Cant use because unordered
            # 'method_diffkeys',

            # Cant use because python 3.6
            'method_ifkeys_dictcomp',
            'method_ifkeys_dictcomp_getitem',

            'method_ifkeys_setunion',
            'method_ifkeys_getitem',
            'method_diffkeys_list',

            # Probably not good
            # 'method_ifkeys_setcomp',
            # 'method_diffkeys_oset',
        ],
        'num_items': [10, 100, 1000],
        'num_other': [1, 3, 5],
        # 'num_other': [1],
        'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0],
        # 'remove_fraction': [0.2, 0.8],
        'keytype': ['str', 'int'],
        # 'keytype': ['str'],
        # 'param_name': [param values],
    }
    xlabel = 'num_items'
    kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype']
    group_labels = {
        'style': ['num_other', 'keytype'],
        'size': ['remove_fraction'],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        args = make_data(**kwargs)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(*args)
        row = {
            'mean': ti.mean(),
            'min': ti.min(),
            'key': key,
            **group_keys,
            **params,
        }
        rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values('min')
    print(data)

    # for each parameter setting, group all methods with that used those exact
    # comparable params. Then rank how good each method did.  That will be a
    # preference profile. We will give that preference profile a weight (e.g.
    # based on the fastest method in the bunch) and then aggregate them with
    # some voting method.

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    weighted_rankings = ub.ddict(lambda: ub.ddict(float))
    for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

        # Choose a ranking weight scheme
        weight = variants['mean'].min()
        # weight = 1
        for rank, method in enumerate(ranking):
            weighted_rankings[method][rank] += weight
            weighted_rankings[method]['total'] += weight

    # Probably a more robust voting method to do this
    weight_rank_rows = []
    for method_name, ranks in weighted_rankings.items():
        weights = ub.dict_diff(ranks, ['total'])
        p_rank = ub.map_vals(lambda w: w / ranks['total'], weights)

        for rank, w in p_rank.items():
            weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name})
    weight_rank_df = pd.DataFrame(weight_rank_rows)
    piv = weight_rank_df.pivot(['name'], ['rank'], ['weight'])
    print(piv)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('skill_agg =\n{}'.format(skill_agg))

    aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values()
    print('weight aggregated =\n{}'.format(aggregated))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('A better x-variable description')
        ax.set_ylabel('A better y-variable description')
예제 #21
0
def benchmark_template():
    import ubelt as ub
    import pandas as pd
    import timerit

    def method1(x, y, z):
        ret = []
        for i in range((x + y) * z):
            ret.append(i)
        return ret

    def method2(x, y, z):
        ret = [i for i in range((x + y) * z)]
        return ret

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(100, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    basis = {
        'method': ['method1', 'method2'],
        'x': list(range(7)),
        'y': [0, 100],
        'z': [2, 3]
        # 'param_name': [param values],
    }
    xlabel = 'x'
    # Set these to param labels that directly transfer to method kwargs
    kw_labels = ['x', 'y', 'z']
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['y'],
        'size': ['z'],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(), kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            chunk_iter = ub.chunks(ti.times, ti.bestof)
            times = list(map(min, chunk_iter))  # TODO: timerit method for this
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time'
                                          ]].mean().rename({'time': 'mean'},
                                                           axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(
        set(stats_data.columns) -
        {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(),
                                      win_prob)).sort_values(ascending=False)
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y=time_key,
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark Name')
        ax.set_xlabel('Size (todo: A better x-variable description)')
        ax.set_ylabel('Time (todo: A better y-variable description)')
        # ax.set_xscale('log')
        # ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
예제 #22
0
def benchmark_mul_vs_pow():
    import ubelt as ub
    import pandas as pd
    import timerit

    from functools import reduce
    import operator as op
    import itertools as it

    def method_pow_via_mul_raw(n):
        """ Construct a function that does multiplication of a value n times """
        return eval('lambda v: ' + ' * '.join(['v'] * n))

    def method_pow_via_mul_for(v, n):
        ret = v
        for _ in range(1, n):
            ret = ret * v
        return ret

    def method_pow_via_mul_reduce(v, n):
        """ Alternative way to multiply a value n times """
        return reduce(op.mul, it.repeat(v, n))

    def method_pow_via_pow(v, n):
        return v ** n

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(500000, bestof=1000, verbose=2)

    basis = {
        'method': ['method_pow_via_mul_raw', 'method_pow_via_pow'],
        'n': list(range(1, 20)),
        'v': ['random-int', 'random-float'],
        # 'param_name': [param values],
    }
    xlabel = 'n'
    kw_labels = ['v', 'n']
    group_labels = {
        'style': ['v'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit

        if params['method'] == 'method_pow_via_mul_raw':
            method = method(kwargs.pop('n'))

        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            import random
            if kwargs['v'] == 'random':
                kwargs['v'] = random.randint(1, 31000) if random.random() > 0.5 else random.random()
            elif kwargs['v'] == 'random-int':
                kwargs['v'] = random.randint(1, 31000)
            elif kwargs['v'] == 'random-float':
                kwargs['v'] = random.random()
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        for time in map(min, ub.chunks(ti.times, ti.bestof)):
            row = {
                # 'mean': ti.mean(),
                'time': time,
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    # data = data.sort_values('time')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('N')
        ax.set_ylabel('Time')
        ax.set_yscale('log')

        plt.show()
예제 #23
0
def load_partial_state(model,
                       model_state_dict,
                       leftover=None,
                       ignore_unset=False,
                       verbose=2,
                       mangle=True,
                       association=None,
                       initializer=None):
    """
    CommandLine:
        python -m netharn.initializers.nninit_base load_partial_state

    Args:
        model (torch.nn.Module): module to initialize

        model_state_dict (dict): state dict we wish to transfer

        leftover (callable): fallback method for initializing incompatible
             areas, if none then those areas are left as-is.

        association (str): controls how we search for the association between
            the two model states. Can be strict, module-hack, prefix-hack, or
            embedding.  Default is: prefix-hack.

        mangle (bool, default=True): If True, mangles tensors that have the
            same key, but different shapes forcing them to fit. This might
            destroy information when forcing a a larger tensor into a smaller
            tensor, or leave extra uninitialized room when a small tensor is
            placed in a larger one. Note be careful when mangling a
            classification layer if class indexes are not aligned.

        verbose (int): verbosity level

    Returns:
        Dict: info - summary of actions taken

    TODO:
        - [ ] Allow user to specify how incompatible layers are handled.

    Notes:

        Have you ever had the scenario where

        Has anyone ever had a problem where you had a torch model with a state
        dict with keys that looked like: `mymodel.detector.layer1.conv.weight`,
        but you had a pretrained weight file with keys that looked like:
        `module.layer1.conv.weight`?

        The latest version of
        `netharn.initializers.functional.load_patial_state` can handle this by
        solving a maximum-common-subtree-isomorphism problem. This computes the
        largest possible mapping between the two state dictionaries that share
        consistent suffixes.

        >>> # This means you can load an off-the-shelf unmodified pretrained resnet50
        >>> # where the keys might look something like this:
        >>> resnet_keys = {
        >>>     'conv1.weight',
        >>>     'layer1.0.conv1.weight',
        >>>     'layer1.0.conv2.weight',
        >>>     'layer1.0.conv3.weight',
        >>>     'layer1.0.downsample.0.weight',
        >>>     'layer2.0.conv1.weight',
        >>>     'layer2.0.conv2.weight',
        >>>     'layer2.0.conv3.weight',
        >>>     'layer3.0.conv1.weight',
        >>>     'layer4.0.conv1.weight',
        >>>     'fc.weight',
        >>>     'fc.bias',
        >>> }
        >>> #
        >>> # And perhaps you have a model that has a state dict where keys
        >>> # look like this:
        >>> model_keys = {
        >>>     'preproc.conv1.weight'
        >>>     'backbone.layer1.0.conv1.weight',
        >>>     'backbone.layer1.0.conv2.weight',
        >>>     'backbone.layer1.0.conv3.weight',
        >>>     'backbone.layer1.0.downsample.0.weight',
        >>>     'backbone.layer2.0.conv1.weight',
        >>>     'backbone.layer2.0.conv2.weight',
        >>>     'backbone.layer2.0.conv3.weight',
        >>>     'backbone.layer3.0.conv1.weight',
        >>>     'backbone.layer4.0.conv1.weight',
        >>>     'head.conv1'
        >>>     'head.conv2'
        >>>     'head.fc.weight'
        >>>     'head.fc.bias'
        >>> }
        >>> #
        >>> # We can compute a partial mapping between them
        >>> subpaths1, subpaths2 = maximum_common_ordered_subpaths(resnet_keys, model_keys)
        >>> print(ub.repr2(ub.dzip(subpaths1, subpaths2)))
        {
            'layer1.0.conv2.weight':        'backbone.layer1.0.conv2.weight',
            'layer1.0.conv3.weight':        'backbone.layer1.0.conv3.weight',
            'layer1.0.downsample.0.weight': 'backbone.layer1.0.downsample.0.weight',
            'layer2.0.conv1.weight':        'backbone.layer2.0.conv1.weight',
            'layer2.0.conv2.weight':        'backbone.layer2.0.conv2.weight',
            'layer2.0.conv3.weight':        'backbone.layer2.0.conv3.weight',
            'layer3.0.conv1.weight':        'backbone.layer3.0.conv1.weight',
            'layer4.0.conv1.weight':        'backbone.layer4.0.conv1.weight',
        }

        Also, if the sizes of the tensor don't quite fit, they will be
        mangled, i.e. "shoved-in" as best as possible.


    Example:
        >>> import netharn as nh
        >>> # ---
        >>> model_other = nh.models.ToyNet2d(input_channels=1, num_classes=10)
        >>> model_other.hack_param1 = torch.nn.Parameter(torch.rand(1))
        >>> model_other.hack_param3 = torch.nn.Parameter(torch.rand(3))
        >>> model_other.hack_param5 = torch.nn.Parameter(torch.rand(3))
        >>> # ---
        >>> model_self = nh.models.ToyNet2d(input_channels=3, num_classes=2)
        >>> model_self.hack_param1 = torch.nn.Parameter(torch.rand(3))
        >>> model_self.hack_param2 = torch.nn.Parameter(torch.rand(3))
        >>> model_self.hack_param4 = torch.nn.Parameter(torch.rand(3))
        >>> # ---
        >>> model_state_dict = model_other.state_dict()
        >>> load_partial_state(model_self, model_state_dict)
        >>> load_partial_state(model_self, model_state_dict, leftover=torch.nn.init.kaiming_normal_)
        >>> _ = load_partial_state(model_self, model_state_dict, leftover=torch.nn.init.kaiming_normal_, association='embedding')

    Example:
        >>> from netharn.initializers.functional import *  # NOQA
        >>> import netharn as nh
        >>> xpu = nh.XPU(None)
        >>> self1 = nh.models.ToyNet2d()
        >>> self2 = xpu.mount(self1)
        >>> load_partial_state(self2, self1.state_dict())
        >>> load_partial_state(self1, self2.state_dict())
        >>> # Add extra nonsense to state-dict
        >>> extra_state_dict = {'extra.' + k: v for k, v in self1.state_dict().items()}
        >>> extra_state_dict['stats'] = ub.peek(extra_state_dict.values()).clone()
        >>> model = self2
        >>> model_state_dict = extra_state_dict
        >>> load_partial_state(self2, extra_state_dict, association='embedding')

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> from netharn.initializers.functional import *  # NOQA
        >>> import torchvision
        >>> import torch
        >>> resnet50 = torchvision.models.resnet50()
        >>> class CustomModel(torch.nn.Module):
        >>>     def __init__(self):
        >>>         super().__init__()
        >>>         self.module = resnet50
        >>>         self.extra = torch.nn.Linear(1, 1)
        >>> model = CustomModel()
        >>> model_state_dict = resnet50.state_dict()
        >>> model_state_dict2 = {'prefix.' + k: v for k, v in model_state_dict.items()}
        >>> import ubelt as ub
        >>> with ub.Timer(verbose=2, label='strict'):
        >>>     load_partial_state(model, model_state_dict, association='strict', verbose=0)
        >>> with ub.Timer(verbose=2, label='prefix-hack'):
        >>>     load_partial_state(model, model_state_dict, association='prefix-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='module-hack'):
        >>>     load_partial_state(model, model_state_dict, association='module-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='embedding'):
        >>>     load_partial_state(model, model_state_dict, association='embedding', verbose=0)

        >>> load_partial_state(model, model_state_dict, association='prefix-hack', verbose=1)
        >>> load_partial_state(model, model_state_dict, association='module-hack', verbose=1)

    Ignore:
        >>> from bioharn.models.new_models_v1 import *  # NOQA
        >>> channels = ChannelSpec.coerce('rgb')
        >>> input_stats = None
        >>> self = MM_HRNetV2_w18_MaskRCNN(classes=3, channels=channels)
        >>> filename = self.pretrained_url
        >>> self._init_backbone_from_pretrained(self.pretrained_url)
        >>> from bioharn.models.mm_models import _load_mmcv_weights
        >>> model_state = _load_mmcv_weights(filename)
        >>> self.detector.backbone.chan_backbones.rgb
        >>> model = self
        >>> model_state_dict = model_state

        from netharn.initializers.functional import *  # NOQA
        import xdev
        globals().update(**xdev.get_func_kwargs(load_partial_state))

    CommandLine:
        xdoctest -m /home/joncrall/code/netharn/netharn/initializers/functional.py load_partial_state:2 --slow

    """
    if association is None:
        association = 'module-hack'  # old default
        # association = 'prefix-hack'  # new default

    if initializer is not None:
        warnings.warn('initializer is deprecated use leftover')
        leftover = initializer

    self_state = model.state_dict()

    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if 0:
            # Automatic way to reduce nodes in the trees?
            # If node b always follows node a, can we contract it?
            nodes1 = [n for p in other_keys for n in p.split('.')]
            nodes2 = [n for p in self_keys for n in p.split('.')]
            tups1 = list(tup for key in other_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            tups2 = list(tup for key in self_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            x = ub.ddict(list)
            for a, b in tups1:
                x[a].append(b)
            for a, b in tups2:
                x[a].append(b)

            nodehist = ub.dict_hist(nodes1 + nodes2)

            for k, v in x.items():
                print('----')
                print(k)
                print(nodehist[k])
                follow_hist = ub.dict_hist(v)
                print(follow_hist)
                total = sum(follow_hist.values())
                if ub.allsame(follow_hist.values()) and total == nodehist[k]:
                    print('CONTRACT')

            # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2]))
            # print(forest_str(paths_to_otree(other_keys, '.')))

        # common_keys = other_keys.intersection(self_keys)
        # if not common_keys:
        if not other_keys.issubset(self_keys):
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association in {'embedding', 'isomorphism'}:
                if verbose > 1:
                    print('Using subpath {} association, may take some time'.
                          format(association))
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)

                if 1:
                    # hack to filter to reduce tree size in embedding problem
                    def shrink_paths(paths):
                        new_paths = []
                        for p in paths:
                            p = p.replace('.0', ':0')
                            p = p.replace('.1', ':1')
                            p = p.replace('.2', ':2')
                            p = p.replace('.3', ':3')
                            p = p.replace('.4', ':4')
                            p = p.replace('.5', ':5')
                            p = p.replace('.6', ':6')
                            p = p.replace('.7', ':7')
                            p = p.replace('.8', ':8')
                            p = p.replace('.9', ':9')
                            p = p.replace('.weight', ':weight')
                            p = p.replace('.bias', ':bias')
                            p = p.replace('.num_batches_tracked',
                                          ':num_batches_tracked')
                            p = p.replace('.running_mean', ':running_mean')
                            p = p.replace('.running_var', ':running_var')
                            # p = p.replace('.conv1', ':conv1')
                            # p = p.replace('.conv2', ':conv2')
                            # p = p.replace('.conv3', ':conv3')
                            # p = p.replace('.bn1', ':bn1')
                            # p = p.replace('.bn2', ':bn2')
                            # p = p.replace('.bn3', ':bn3')
                            new_paths.append(p)
                        return new_paths

                    # Reducing the depth saves a lot of time
                    paths1_ = shrink_paths(paths1)
                    paths2_ = shrink_paths(paths2)

                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1_, paths2_, sep='.', mode=association)
                subpaths1 = [p.replace(':', '.') for p in subpaths1]
                subpaths2 = [p.replace(':', '.') for p in subpaths2]
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    other_unmapped = sorted(other_keys - set(mapping.keys()))
                    self_unmapped = sorted(self_keys - set(mapping.values()))
                    print('-- embed association (other -> self) --')
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                    print('self_unmapped = {}'.format(
                        ub.repr2(self_unmapped, nl=1)))
                    print('other_unmapped = {}'.format(
                        ub.repr2(other_unmapped, nl=1)))
                    print('len(mapping) = {}'.format(
                        ub.repr2(len(mapping), nl=1)))
                    print('len(self_unmapped) = {}'.format(
                        ub.repr2(len(self_unmapped), nl=1)))
                    print('len(other_unmapped) = {}'.format(
                        ub.repr2(len(other_unmapped), nl=1)))
                    print('-- end embed association --')

                # HACK: something might be wrong, there was an instance with
                # HRNet_w32 where multiple keys mapped to the same key
                # bad keys were incre_modules.3.0.conv1.weight and conv1.weight
                #
                # This will not error, but may produce bad output
                try:
                    model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                                   model_state_dict)
                except Exception as ex:
                    HACK = 1
                    if HACK:
                        new_state_dict_ = {}
                        for k, v in model_state_dict.items():
                            new_state_dict_[mapping.get(k, k)] = v
                        model_state_dict = new_state_dict_
                        warnings.warn('ex = {!r}'.format(ex))
                    else:
                        raise
            else:
                raise KeyError(association)
        return model_state_dict

    other_state = _fix_keys(model_state_dict)

    self_unset_keys = set(
        self_state.keys())  # will end up as keys in our that were not set
    other_unused_keys = set(other_state.keys(
    ))  # will end up as keys in the other model that were not used

    seen_keys = ub.ddict(set)

    for key, other_value in other_state.items():
        if key not in self_state:
            if verbose > 0:
                print('Skipping {} because it does not exist'.format(key))
            seen_keys['skipped'].add(key)
        else:
            self_value = self_state[key]
            if other_value.size() == self_value.size():
                self_state[key] = other_value
                self_unset_keys.remove(key)
                other_unused_keys.remove(key)
                seen_keys['full_add'].add(key)
            elif len(other_value.size()) == len(self_value.size()):
                if not mangle:
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size and mangle=False'
                            .format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                elif key.endswith('bias'):
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size'.format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                else:
                    if leftover is None:
                        if verbose > 0:
                            print(
                                'Skipping {} due to incompatable size and no default initializer'
                                .format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        seen_keys['skipped'].add(key)
                    else:
                        if verbose > 0:
                            print('Partially add {} with incompatable size'.
                                  format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        # Initialize all weights in case any are unspecified
                        if leftover is None:
                            try:
                                leftover(self_state[key])
                            except Exception:
                                if verbose > 0:
                                    print('Unable to init {} with {}'.format(
                                        key, leftover))

                        # Transfer as much as possible
                        min_size = np.minimum(self_state[key].shape,
                                              other_value.shape)
                        sl = tuple([slice(0, s) for s in min_size])
                        self_state[key][sl] = other_value[sl]

                        # if shock_partial:
                        #     # Shock weights because we are doing something weird
                        #     # might help the network recover in case this is
                        #     # not a good idea
                        #     shock(self_state[key], func=leftover)
                        self_unset_keys.remove(key)
                        other_unused_keys.remove(key)

                        if self_state[key].numel() < other_value.numel():
                            seen_keys['partial_add_some'].add(key)
                        else:
                            seen_keys['partial_add_all'].add(key)
            else:
                if verbose > 0:
                    print('Skipping {} due to incompatable size'.format(key))
                    print(' * self  = {!r}'.format(self_value.size()))
                    print(' * other = {!r}'.format(other_value.size()))
                seen_keys['skipped'].add(key)

    if ignore_unset is True:
        self_unset_keys = []
    elif ignore_unset:
        self_unset_keys = list(ub.oset(self_unset_keys) - set(ignore_unset))

    if (self_unset_keys or other_unused_keys or seen_keys['partial_add_some']
            or seen_keys['partial_add_all']):
        if verbose > 0:
            if seen_keys:
                print('Pretrained weights are a partial fit')
            else:
                print('Pretrained weights do not fit!')
        if verbose > 1:
            print('Seen Keys: {}'.format(ub.repr2(seen_keys, nl=2)))
            print('Self Unset Keys: {}'.format(ub.repr2(self_unset_keys,
                                                        nl=1)))
            print('Other Unused keys: {}'.format(
                ub.repr2(other_unused_keys, nl=1)))
            print('summary:')
            seen_sum = ub.map_vals(len, seen_keys)
            print('Seen Num: {}'.format(ub.repr2(seen_sum, nl=2)))
            print('Self Unset Num: {}'.format(
                ub.repr2(len(self_unset_keys), nl=1)))
            print('Other Unused Num: {}'.format(
                ub.repr2(len(other_unused_keys), nl=1)))
        if leftover:
            if verbose > 0:
                print('Initializing unused keys using {}'.format(leftover))
            for key in self_unset_keys:
                if key.endswith('.num_batches_tracked'):
                    pass  # ignore num_batches_tracked
                elif key.endswith('.bias'):
                    self_state[key].fill_(0)
                else:
                    try:
                        leftover(self_state[key])
                    except Exception:
                        if verbose > 0:
                            print('Unable to init {} with {}'.format(
                                key, leftover))

    else:
        if verbose > 0:
            print('Pretrained weights are a perfect fit')
    model.load_state_dict(self_state)

    info = {
        'seen': seen_keys,
        'self_unset': self_unset_keys,
        'other_unused': other_unused_keys
    }
    return info
예제 #24
0
 def method_diffkeys_oset(*args):
     first_dict = args[0]
     keys = ub.oset(first_dict)
     keys.difference_update(*map(set, args[1:]))
     new0 = dict((k, first_dict[k]) for k in keys)
     return new0
예제 #25
0
        np.logspace(0, np.log(10000) / np.log(base), num=10,
                    base=base).round().astype(int),
        'subsize': [2, 8, 32, 64],
    }
    data_kwkeys = ub.compatible(basis, generate_data)
    func_kwkeys = ub.compatible(basis, method_lut[basis['method'][0]])

    # These variable influence what is plotted on the x-asis y-axis and
    # with different line types
    xlabel = 'size'
    ylabel = 'time'
    group_labels = {
        'size': ['subsize'],
        'style': ['subsize'],
    }
    hue_labels = ub.oset(basis) - {xlabel}
    if group_labels:
        hue_labels = hue_labels - set.union(*map(set, group_labels.values()))
    group_labels['hue'] = list(hue_labels)
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
예제 #26
0
def load_partial_state(model,
                       model_state_dict,
                       initializer=None,
                       ignore_unset=False,
                       verbose=2):
    """
    CommandLine:
        python -m netharn.initializers.nninit_base load_partial_state

    Example:
        >>> import netharn as nh
        >>> self1 = nh.models.ToyNet2d(input_channels=1, num_classes=10)
        >>> self2 = nh.models.ToyNet2d(input_channels=3, num_classes=2)
        >>> model_state_dict = self1.state_dict()
        >>> load_partial_state(self2, model_state_dict)

    Example:
        >>> import netharn as nh
        >>> xpu = nh.XPU(None)
        >>> self1 = nh.models.ToyNet2d()
        >>> self2 = xpu.mount(self1)
        >>> load_partial_state(self2, self1.state_dict())
        >>> load_partial_state(self1, self2.state_dict())
    """
    self_state = model.state_dict()

    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if not other_keys.intersection(self_keys):
            prefix = 'module.'

            def smap(f, ss):
                return set(map(f, ss))

            def fix1(k):
                return prefix + k

            def fix2(k):
                if k.startswith(prefix):
                    return k[len(prefix):]

            if smap(fix1, other_keys).intersection(self_keys):
                model_state_dict = ub.map_keys(fix1, model_state_dict)
            elif smap(fix2, other_keys).intersection(self_keys):
                model_state_dict = ub.map_keys(fix2, model_state_dict)

        return model_state_dict

    other_state = _fix_keys(model_state_dict)

    self_unset_keys = set(
        self_state.keys())  # will end up as keys in our that were not set
    other_unused_keys = set(other_state.keys(
    ))  # will end up as keys in the other model that were not used

    seen_keys = ub.ddict(set)

    for key, other_value in other_state.items():
        if key not in self_state:
            if verbose > 0:
                print('Skipping {} because it does not exist'.format(key))
            seen_keys['skipped'].add(key)
        else:
            self_value = self_state[key]
            if other_value.size() == self_value.size():
                self_state[key] = other_value
                self_unset_keys.remove(key)
                other_unused_keys.remove(key)
                seen_keys['full_add'].add(key)
            elif len(other_value.size()) == len(self_value.size()):
                if key.endswith('bias'):
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size'.format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                else:
                    if initializer is None:
                        if verbose > 0:
                            print(
                                'Skipping {} due to incompatable size and no default initializer'
                                .format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        seen_keys['skipped'].add(key)
                    else:
                        if verbose > 0:
                            print('Partially add {} with incompatable size'.
                                  format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        # Initialize all weights in case any are unspecified
                        if initializer is not None:
                            initializer(self_state[key])

                        # Transfer as much as possible
                        min_size = np.minimum(self_state[key].shape,
                                              other_value.shape)
                        sl = tuple([slice(0, s) for s in min_size])
                        self_state[key][sl] = other_value[sl]

                        # if shock_partial:
                        #     # Shock weights because we are doing something weird
                        #     # might help the network recover in case this is
                        #     # not a good idea
                        #     shock(self_state[key], func=initializer)
                        self_unset_keys.remove(key)
                        other_unused_keys.remove(key)
                        seen_keys['partial_add'].add(key)
            else:
                if verbose > 0:
                    print('Skipping {} due to incompatable size'.format(key))
                    print(' * self  = {!r}'.format(self_value.size()))
                    print(' * other = {!r}'.format(other_value.size()))
                seen_keys['skipped'].add(key)

    if ignore_unset is True:
        self_unset_keys = []
    elif ignore_unset:
        self_unset_keys = list(ub.oset(self_unset_keys) - set(ignore_unset))

    if self_unset_keys or other_unused_keys:
        if verbose > 0:
            if seen_keys:
                print('Pretrained weights are a partial fit')
            else:
                print('Pretrained weights do not fit!')
        if verbose > 1:
            print('Seen Keys: {}'.format(ub.repr2(seen_keys, nl=2)))
            print('Self Unset Keys: {}'.format(ub.repr2(self_unset_keys,
                                                        nl=1)))
            print('Other Unused keys: {}'.format(
                ub.repr2(other_unused_keys, nl=1)))
        if initializer:
            if verbose > 0:
                print('Initializing unused keys using {}'.format(initializer))
            for key in self_unset_keys:
                if key.endswith('.bias'):
                    self_state[key].fill_(0)
                else:
                    initializer(self_state[key])
    else:
        if verbose > 0:
            print('Pretrained weights are a perfect fit')
    model.load_state_dict(self_state)