Exemplo n.º 1
0
def _build_typed_params_kwargs_docstr_block(typed_params):
    r"""
    Args:
        typed_params (dict):

    CommandLine:
        python -m pyhesaff build_typed_params_docstr

    Example:
        >>> # DISABLE_DOCTEST
        >>> from pyhesaff._pyhesaff import *  # NOQA
        >>> typed_params = HESAFF_TYPED_PARAMS
        >>> result = build_typed_params_docstr(typed_params)
        >>> print(result)
    """
    kwargs_lines = []
    for tup in typed_params:
        type_, name, default = tup
        typestr = str(type_).replace('<class \'ctypes.c_',
                                     '').replace('\'>', '')
        line_fmtstr = '{name} ({typestr}): default={default}'
        line = line_fmtstr.format(name=name, typestr=typestr, default=default)
        kwargs_lines.append(line)
    kwargs_docstr_block = ('Kwargs:\n' +
                           ub.indent('\n'.join(kwargs_lines), '    '))
    return ub.indent(kwargs_docstr_block, '    ')
def google_to_numpy_docstr(docstr):
    """
    Convert a google-style docstring to a numpy-style docstring

    Args:
        docstr (str): contents of ``func.__doc__`` for some ``func``, assumed
            to be in google-style.

    Returns:
        str: numpy style docstring
    """
    import ubelt as ub
    from xdoctest.docstr import docscrape_google
    docblocks = docscrape_google.split_google_docblocks(docstr)
    new_parts = []
    for key, block in docblocks:
        old_body, relpos = block
        new_key = key
        new_body = old_body

        if key == '__DOC__':
            new_key = None
            new_text = new_body
        elif key in {'Args'}:
            new_key = 'Parameters'
            arginfos = list(docscrape_google.parse_google_argblock(old_body))
            parts = []
            for info in arginfos:
                info['desc'] = ub.indent(info['desc'])
                p = '{name}: {type}\n{desc}'.format(**info)
                parts.append(p)
                parts.append('')
            new_body = '\n'.join(parts)
        if key in {'Returns', 'Yields'}:
            retinfos = list(docscrape_google.parse_google_retblock(old_body))
            parts = []
            for info in retinfos:
                info['desc'] = ub.indent(info['desc'])
                info['name'] = info.get('name', '')
                parts.append('{name}: {type}\n{desc}'.format(**info))
                parts.append('')
            new_body = '\n'.join(parts)

        if new_key is not None:
            new_text = '\n'.join([new_key, '-' * len(new_key), new_body])

        if new_text.strip():
            new_parts.append(new_text)

    new_docstr = '\n'.join(new_parts)
    new_docstr = new_docstr.strip('\n')
    return new_docstr
Exemplo n.º 3
0
def auto_cmdline():
    import ubelt as ub
    from xdoctest import static_analysis as static
    import vim
    # import imp
    # imp.reload(static)
    modname, moddir = get_current_modulename()
    funcname, searchlines, pos, foundline = find_pyfunc_above_cursor()
    if static.is_modname_importable(modname, exclude=['.']):
        text = ub.codeblock(
            '''
            CommandLine:
                python -m {modname} {funcname}
            ''').format(funcname=funcname, modname=modname)
    else:
        modpath = ub.compressuser(vim.current.buffer.name)
        text = ub.codeblock(
            '''
            CommandLine:
                python {modpath} {funcname}
            ''').format(funcname=funcname, modpath=modpath)

    def get_indent(line):
        """
        returns the preceding whitespace
        """
        n_whitespace = len(line) - len(line.lstrip())
        prefix = line[:n_whitespace]
        return prefix

    prefix = get_indent(foundline)

    text = ub.indent(text, prefix + '    ')
    return text
Exemplo n.º 4
0
def _join_itemstrs(itemstrs,
                   itemsep,
                   newlines,
                   _leaf_info,
                   nobraces,
                   trailing_sep,
                   compact_brace,
                   lbr,
                   rbr,
                   align=False):
    """
    Joins string-ified items with separators newlines and container-braces.
    """
    # positive newlines means start counting from the root
    use_newline = newlines > 0

    # negative countdown values mean start counting from the leafs
    # if compact_brace < 0:
    #     compact_brace = (-compact_brace) >= _leaf_info['max_height']
    if newlines < 0:
        use_newline = (-newlines) < _leaf_info['max_height']

    if use_newline:
        sep = ',\n'
        if nobraces:
            body_str = sep.join(itemstrs)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            retstr = body_str
        else:
            if compact_brace:
                # Why must we modify the indentation below and not here?
                # prefix = ''
                # rest = [ub.indent(s, prefix) for s in itemstrs[1:]]
                # indented = itemstrs[0:1] + rest
                indented = itemstrs
            else:
                import ubelt as ub
                prefix = ' ' * 4
                indented = [ub.indent(s, prefix) for s in itemstrs]

            if align:
                indented = _align_lines(indented, character=align)

            body_str = sep.join(indented)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            if compact_brace:
                # Why can we modify the indentation here but not above?
                braced_body_str = (lbr + body_str.replace('\n', '\n ') + rbr)
            else:
                braced_body_str = (lbr + '\n' + body_str + '\n' + rbr)
            retstr = braced_body_str
    else:
        sep = ',' + itemsep
        body_str = sep.join(itemstrs)
        if trailing_sep and len(itemstrs) > 0:
            body_str += ','
        retstr = (lbr + body_str + rbr)
    return retstr
Exemplo n.º 5
0
 def idstr(self, nl=None, thresh=80):
     assert len(self._items) == len(self._children)
     assert len(self._params) == 0
     child_part = ['{}'.format(child.idstr(nl, thresh=thresh - 2))
                   for key, child in self.children()]
     body, nl = self._make_body([], child_part, nl, thresh - 2)
     if nl:
         body = ub.indent(body, '  ')
     return '[{}]'.format(body.rstrip(' '))
Exemplo n.º 6
0
    def idstr(self, nl=None, thresh=80):
        """
        Example:
            >>> self = TruncNormal()
            >>> self.idstr()
            >>> #
            >>> #
            >>> class Dummy(Distribution):
            >>>     def __init__(self):
            >>>         super(Dummy, self).__init__()
            >>>         self._setparam('a', 3)
            >>>         self.b = Normal()
            >>>         self.c = Uniform()
            >>> self = Dummy()
            >>> print(self.idstr())
            >>> #
            >>> class Tail5(Distribution):
            >>>     def __init__(self):
            >>>         super(Tail5, self).__init__()
            >>>         self._setparam('a_parameter', 3)
            >>>         for i in range(5):
            >>>             self._setparam(chr(i + 97), i)
            >>> #
            >>> class Tail6(Distribution):
            >>>     def __init__(self):
            >>>         super(Tail6, self).__init__()
            >>>         for i in range(9):
            >>>             self._setparam(chr(i + 97) + '_parameter', i)
            >>> #
            >>> class Dummy2(Distribution):
            >>>     def __init__(self):
            >>>         super(Dummy2, self).__init__()
            >>>         self._setparam('x', 3)
            >>>         self._setparam('y', 3)
            >>>         self.d = Dummy()
            >>>         self.f = Tail6()
            >>>         self.y = Tail5()
            >>> self = Dummy2()
            >>> print(self.idstr())
            >>> print(ub.repr2(self.json_id()))
        """
        classname = self.__class__.__name__
        self_part = ['{}={}'.format(key, ub.repr2(value, precision=2, si=True, nl=0))
                     for key, value in self._params.items()]
        child_part = ['{}={}'.format(key, child.idstr(nl, thresh=thresh - 2))
                      for key, child in self.children()]

        body, nl = self._make_body(self_part, child_part, nl, thresh - len(classname) - 2)
        if nl:
            body = ub.indent(body, '  ')
        return '{}({})'.format(classname, body.rstrip(' '))
Exemplo n.º 7
0
    def prompt(iiter):
        def _or_phrase(list_):
            return util.conj_phrase(list(map(repr, map(str, list_))), 'or')

        msg_list = [
            'enter %s to %s' % (_or_phrase(tup[1]), tup[2])
            for tup in iiter.action_tuples
        ]
        msg = ub.indent('\n'.join(msg_list), ' | * ')
        msg = ''.join([' +-----------', msg, '\n L-----------\n'])
        # TODO: timeout, help message
        print(msg)
        ans = iiter.wait_for_input()
        return ans
Exemplo n.º 8
0
def count_ubelt_usage():
    """
    import sys, ubelt
    sys.path.append(ubelt.expandpath('~/code/ubelt/dev'))
    from gen_api_for_docs import *  # NOQA
    """
    from count_usage_freq import count_ubelt_usage
    usage = count_ubelt_usage()

    import numpy as np
    import kwarray
    import ubelt as ub

    gaurd = ('=' * 64 + ' ' + '=' * 16)
    print(gaurd)
    print('{:<64} {:>8}'.format(' Function name ', 'Usefulness'))
    print(gaurd)
    for key, value in usage.items():
        print('{:<64} {:>16}'.format(':func:`ubelt.' + key + '`', value))
    print(gaurd)

    raw_scores = np.array(list(usage.values()))

    print('\n.. code:: python\n')
    print(
        ub.indent('usage stats = ' + ub.repr2(
            kwarray.stats_dict(raw_scores, median=True, sum=True), nl=1)))

    for attrname in ub.__all__:
        member = getattr(ub, attrname)

        submembers = getattr(member, '__all__', None)

        if attrname.startswith('util_'):
            if not submembers:
                from mkinit.static_mkinit import _extract_attributes
                submembers = _extract_attributes(member.__file__)

        if submembers:
            print('\n:mod:`ubelt.{}`'.format(attrname))
            print('-------------')
            for subname in submembers:
                if not subname.startswith('_'):
                    print(':func:`ubelt.{}`'.format(subname))
            submembers = dir(member)
Exemplo n.º 9
0
    def grep_diff(repo, pattern, inverse=False):
        matching_fpaths = []
        print('Matching Files:')
        fpath_list = [f for f in repo.modified_files() if exists(f)]
        for fpath in fpath_list:
            text = repo.git.diff(fpath)
            matches = list(grep_text(pattern, text, fpath))

            if inverse:
                if not matches:
                    matching_fpaths.append(fpath)
            else:
                if matches:
                    for gmatch in matches:
                        print(gmatch.highlighted())
                    matching_fpaths.append(fpath)

        print('Total files matched = {}'.format(len(matching_fpaths)))
        print(ub.indent('\n'.join(matching_fpaths), ' * '))
Exemplo n.º 10
0
def _join_itemstrs(itemstrs, itemsep, newlines, nobraces, trailing_sep,
                   compact_brace, lbr, rbr):
    """
    Joins stringified items with separators newlines and container-braces.
    """
    import ubelt as ub

    if newlines > 0:
        sep = ',\n'
        if nobraces:
            body_str = sep.join(itemstrs)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            retstr = body_str
        else:
            if compact_brace:
                # Why must we modify the indentation below and not here?
                # prefix = ''
                # rest = [ub.indent(s, prefix) for s in itemstrs[1:]]
                # indented = itemstrs[0:1] + rest
                indented = itemstrs
            else:
                prefix = ' ' * 4
                indented = [ub.indent(s, prefix) for s in itemstrs]

            body_str = sep.join(indented)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            if compact_brace:
                # Why can we modify the indentation here but not above?
                braced_body_str = (lbr + body_str.replace('\n', '\n ') + rbr)
            else:
                braced_body_str = (lbr + '\n' + body_str + '\n' + rbr)
            retstr = braced_body_str
    else:
        sep = ',' + itemsep
        body_str = sep.join(itemstrs)
        if trailing_sep and len(itemstrs) > 0:
            body_str += ','
        retstr = (lbr + body_str + rbr)
    return retstr
Exemplo n.º 11
0
def print_facts():
    """
    Print facts with rich
    """
    from rich.panel import Panel
    from rich.console import Console

    fact_data = load_facts()

    console = Console()
    for fact in fact_data['facts']:
        text = ub.codeblock('''
            {}

            References:
            {}
            ''').format(
            ub.paragraph(fact['text']),
            ub.indent(fact['references']),
        )
        fact_panel = Panel(text, title='FACT')
        console.print(fact_panel)
Exemplo n.º 12
0
def _gen_cluttered_func(n=100):
    lines = []
    import ubelt as ub
    import kwarray
    rng = kwarray.ensure_rng(0)

    varnames = []
    for i in range(n):
        mode = rng.choice(['int', 'float', 'str'])
        if mode == 'int':
            value = rng.randint(0, 100000)
        if mode == 'str':
            value = ub.hash_data(rng.randint(0, 100000))[0:10]
        if mode == 'float':
            value = rng.randn() * 1000
        varname = 'var{:03d}'.format(i)
        line = '{} = {!r}'.format(varname, value)
        lines.append(line)
        varnames.append(varname)

    clutter_vars = ub.indent('\n'.join(lines))

    template = ub.codeblock('''
        def {FUNCNAME}():
        {CLUTTER}
            ignore_inf_loss_parts = d['ignore_inf_loss_parts']
            for i in range(num_inner_loops):
                if ignore_inf_loss_parts:
                    pass
            # return {RETVAL}
        ''')

    retval = '[{}]'.format(','.join(varnames))
    funcname = 'clutter_{}'.format(n)

    text = template.format(FUNCNAME=funcname,
                           CLUTTER=clutter_vars,
                           RETVAL=retval)
    return text, funcname
Exemplo n.º 13
0
def auto_cmdline():
    import ubelt as ub
    modname, moddir = get_current_modulename()
    funcname, searchlines, pos, foundline = find_pyfunc_above_cursor()
    text = ub.codeblock(
        '''
        CommandLine:
            python -m {modname} {funcname}
        ''').format(funcname=funcname, modname=modname)

    def get_indent(line):
        """
        returns the preceding whitespace
        """
        n_whitespace = len(line) - len(line.lstrip())
        prefix = line[:n_whitespace]
        return prefix

    prefix = get_indent(foundline)

    text = ub.indent(text, prefix + '    ')
    return text
Exemplo n.º 14
0
def convert_argparse(parser):
    """
    Helper for converting an existing argparse object to scriptconfig
    definition.
    """
    import argparse
    import ubelt as ub
    value_template1 = '{dest!r}: scfg.Value({default!r}, help={help!r})'
    value_template2 = '{dest!r}: scfg.Value({default!r})'

    lines = []
    for action in parser._actions:
        if action.default == argparse.SUPPRESS:
            continue
        if action.help is None:
            value_text = value_template2.format(
                dest=action.dest,
                default=action.default,
            )
        else:
            value_text = value_template1.format(dest=action.dest,
                                                default=action.default,
                                                help=ub.paragraph(action.help))
        lines.append(value_text + ',')

    class_template = ub.codeblock('''
        import scriptconfig as scfg
        class MyConfig(scfg.Config):
            """{desc}"""
            default = {{
        {body}
            }}
        ''')

    body = ub.indent('\n'.join(lines), ' ' * 8)
    text = class_template.format(body=body, desc=parser.description)
    print(text)
Exemplo n.º 15
0
def _complete_source(line, state_indent, line_iter):
    """
    helper
    remove lines from the iterator if they are needed to complete source
    """
    norm_line = line[state_indent:]  # Normalize line indentation
    prefix = norm_line[:4]
    suffix = norm_line[4:]
    assert prefix.strip() in {'>>>', '...'}, '{}'.format(prefix)
    yield line, norm_line

    source_parts = [suffix]

    # These hacks actually modify the input doctest slighly
    HACK_TRIPLE_QUOTE_FIX = True

    try:
        while not static.is_balanced_statement(source_parts, only_tokens=True):
            line_idx, next_line = next(line_iter)
            norm_line = next_line[state_indent:]
            prefix = norm_line[:4]
            suffix = norm_line[4:]

            if prefix.strip() not in {'>>>', '...', ''}:  # nocover
                error = True
                if HACK_TRIPLE_QUOTE_FIX:
                    # TODO: make a more robust patch
                    if any("'''" in s or '"""' in s for s in source_parts):
                        # print('HACK FIXING TRIPLE QUOTE')
                        next_line = next_line[:state_indent] + '... ' + norm_line
                        norm_line = '... ' + norm_line
                        prefix = ''
                        suffix = norm_line
                        error = False

                if error:
                    if DEBUG:
                        print(' * !!!ERROR!!!')
                        print(' * source_parts = {!r}'.format(source_parts))
                        print(' * prefix = {!r}'.format(prefix))
                        print(' * norm_line = {!r}'.format(norm_line))
                        print(' * !!!!!!!!!!!!!')

                    raise SyntaxError(
                        'Bad indentation in doctest on line {}: {!r}'.format(
                            line_idx, next_line))
            source_parts.append(suffix)
            yield next_line, norm_line
    except StopIteration:
        if DEBUG:
            import ubelt as ub
            print('<FAIL DID NOT COMPLETE SOURCE>')
            import traceback
            tb_text = traceback.format_exc()
            tb_text = ub.highlight_code(tb_text)
            tb_text = ub.indent(tb_text)
            print(tb_text)
            # print(' * line_iter = {!r}'.format(line_iter))
            print(' * state_indent = {!r}'.format(state_indent))
            print(' * line = {!r}'.format(line))
            # print('source =\n{}'.format('\n'.join(source_parts)))
            print('# Ensure that the following line should actually fail')
            print('source_parts = {}'.format(ub.repr2(source_parts, nl=2)))
            print(
                ub.codeblock(r'''
                from xdoctest import static_analysis as static
                static.is_balanced_statement(source_parts, only_tokens=False)
                static.is_balanced_statement(source_parts, only_tokens=True)
                text = '\n'.join(source_parts)
                print(text)
                static.six_axt_parse(text)
                '''))
            print('</FAIL DID NOT COMPLETE SOURCE>')
            # sys.exit(1)
        # TODO: use AST to reparse all doctest parts to discover where the
        # syntax error in the doctest is and then raise it.
        raise exceptions.IncompleteParseError(
            'ill-formed doctest: all parts have been processed '
            'but the doctest source is not balanced')
    else:
        if DEBUG > 1:
            import ubelt as ub
            print('<SUCCESS COMPLETED SOURCE>')
            # print(' * line_iter = {!r}'.format(line_iter))
            print('source_parts = {}'.format(ub.repr2(source_parts, nl=2)))
            print('</SUCCESS COMPLETED SOURCE>')
Exemplo n.º 16
0
def classification_report(y_true,
                          y_pred,
                          target_names=None,
                          sample_weight=None,
                          verbose=False):
    """
    Computes a classification report which is a collection of various metrics
    commonly used to evaulate classification quality. This can handle binary
    and multiclass settings.

    Note that this function does not accept probabilities or scores and must
    instead act on final decisions. See ovr_classification_report for a
    probability based report function using a one-vs-rest strategy.

    This emulates the bm(cm) Matlab script written by David Powers that is used
    for computing bookmaker, markedness, and various other scores.

    References:
        https://csem.flinders.edu.au/research/techreps/SIE07001.pdf
        https://www.mathworks.com/matlabcentral/fileexchange/5648-bm-cm-?requestedDomain=www.mathworks.com
        Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN
            Error Measures in MultiClass Prediction

    Example:
        >>> # xdoctest: +IGNORE_WANT
        >>> # xdoctest: +REQUIRES(module:sklearn)
        >>> y_true = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3]
        >>> y_pred = [1, 2, 1, 3, 1, 2, 2, 3, 2, 2, 3, 3, 2, 3, 3, 3, 1, 3]
        >>> target_names = None
        >>> sample_weight = None
        >>> report = classification_report(y_true, y_pred, verbose=0)
        >>> print(report['confusion'])
        pred  1  2  3  Σr
        real
        1     3  1  1   5
        2     0  4  1   5
        3     1  1  6   8
        Σp    4  6  8  18
        >>> print(report['metrics'])
        metric    precision  recall    fpr  markedness  bookmaker    mcc  support
        class
        1            0.7500  0.6000 0.0769      0.6071     0.5231 0.5635        5
        2            0.6667  0.8000 0.1538      0.5833     0.6462 0.6139        5
        3            0.7500  0.7500 0.2000      0.5500     0.5500 0.5500        8
        combined     0.7269  0.7222 0.1530      0.5751     0.5761 0.5758       18

    Ignore:
        >>> size = 100
        >>> rng = np.random.RandomState(0)
        >>> p_classes = np.array([.90, .05, .05][0:2])
        >>> p_classes = p_classes / p_classes.sum()
        >>> p_wrong   = np.array([.03, .01, .02][0:2])
        >>> y_true = testdata_ytrue(p_classes, p_wrong, size, rng)
        >>> rs = []
        >>> for x in range(17):
        >>>     p_wrong += .05
        >>>     y_pred = testdata_ypred(y_true, p_wrong, rng)
        >>>     report = classification_report(y_true, y_pred, verbose='hack')
        >>>     rs.append(report)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> import pandas as pd
        >>> df = pd.DataFrame(rs).drop(['raw'], axis=1)
        >>> delta = df.subtract(df['target'], axis=0)
        >>> sqrd_error = np.sqrt((delta ** 2).sum(axis=0))
        >>> print('Error')
        >>> print(sqrd_error.sort_values())
        >>> ys = df.to_dict(orient='list')
        >>> kwplot.multi_plot(ydata_list=ys)
    """
    import pandas as pd
    import scipy as sp
    import sklearn.metrics
    from sklearn.preprocessing import LabelEncoder

    if target_names is None:
        unique_labels = np.unique(np.hstack([y_true, y_pred]))
        if len(unique_labels) == 1 and (unique_labels[0] == 0
                                        or unique_labels[0] == 1):
            target_names = np.array([False, True])
            y_true_ = y_true
            y_pred_ = y_pred
        else:
            lb = LabelEncoder()
            lb.fit(unique_labels)
            y_true_ = lb.transform(y_true)
            y_pred_ = lb.transform(y_pred)
            target_names = lb.classes_
    else:
        y_true_ = y_true
        y_pred_ = y_pred

    # Real data is on the rows,
    # Pred data is on the cols.

    cm = sklearn.metrics.confusion_matrix(y_true_,
                                          y_pred_,
                                          sample_weight=sample_weight,
                                          labels=np.arange(len(target_names)))
    confusion = cm  # NOQA

    k = len(cm)  # number of classes
    N = cm.sum()  # number of examples

    real_total = cm.sum(axis=1)
    pred_total = cm.sum(axis=0)

    # the number of "positive" cases **per class**
    n_pos = real_total  # NOQA
    # the number of times a class was predicted.
    n_neg = N - n_pos  # NOQA

    # number of true positives per class
    n_tps = np.diag(cm)
    # number of true negatives per class
    n_fps = (cm - np.diagflat(np.diag(cm))).sum(axis=0)

    import warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', message='invalid .* true_divide')
        warnings.filterwarnings('ignore', message='divide by zero')

        tprs = n_tps / real_total  # true pos rate (recall)
        tpas = n_tps / pred_total  # true pos accuracy (precision)

        unused = (real_total + pred_total) == 0

        fprs = n_fps / n_neg  # false pose rate
        fprs[unused] = np.nan

        rprob = real_total / N
        pprob = pred_total / N

        # if len(cm) == 2:
        #     [[A, B],
        #      [C, D]] = cm
        #     (A * D - B * C) / np.sqrt((A + C) * (B + D) * (A + B) * (C + D))

        # bookmaker is analogous to recall, but unbiased by class frequency
        rprob_mat = np.tile(rprob, [k, 1]).T - (1 - np.eye(k))
        bmcm = cm.T / rprob_mat
        bms = np.sum(bmcm.T, axis=0) / N

        # markedness is analogous to precision, but unbiased by class frequency
        pprob_mat = np.tile(pprob, [k, 1]).T - (1 - np.eye(k))
        mkcm = cm / pprob_mat
        mks = np.sum(mkcm.T, axis=0) / N

        mccs = np.sign(bms) * np.sqrt(np.abs(bms * mks))

        import scipy
        # https://en.wikipedia.org/wiki/F1_score
        # f1_scores = scipy.stats.hmean(np.hstack([
        #     tpas[:, None],
        #     tprs[:, None]
        # ]), axis=1)
        f1_scores = 2 * (tpas * tprs) / (tpas + tprs)
        g1_scores = scipy.stats.gmean(np.hstack([tpas[:, None], tprs[:,
                                                                     None]]),
                                      axis=1)

    perclass_data = ub.odict([
        ('precision', tpas),
        ('recall', tprs),
        ('fpr', fprs),
        ('markedness', mks),
        ('bookmaker', bms),
        ('mcc', mccs),
        ('f1', f1_scores),
        ('g1', g1_scores),
        ('support', real_total),
    ])

    tpa = np.nansum(tpas * rprob)
    tpr = np.nansum(tprs * rprob)

    fpr = np.nansum(fprs * rprob)

    mk = np.nansum(mks * rprob)
    bm = np.nansum(bms * pprob)

    # The simple mean seems to do the best
    mccs_ = mccs[~np.isnan(mccs)]
    if len(mccs_) == 0:
        mcc_combo = np.nan
    else:
        mcc_combo = np.nanmean(mccs_)

    combined_data = ub.odict([
        ('precision', tpa),
        ('recall', tpr),
        ('fpr', fpr),
        ('markedness', mk),
        ('bookmaker', bm),
        # ('mcc', np.sign(bm) * np.sqrt(np.abs(bm * mk))),
        ('mcc', mcc_combo),
        # np.sign(bm) * np.sqrt(np.abs(bm * mk))),
        ('f1', np.nanmean(f1_scores)),
        ('g1', np.nanmean(g1_scores)),
        ('support', real_total.sum()),
    ])

    # Not sure how to compute this. Should it agree with the sklearn impl?
    if verbose == 'hack':
        verbose = False
        mcc_known = sklearn.metrics.matthews_corrcoef(
            y_true, y_pred, sample_weight=sample_weight)
        mcc_raw = np.sign(bm) * np.sqrt(np.abs(bm * mk))

        def gmean(x, w=None):
            if w is None:
                return sp.stats.gmean(x)
            return np.exp(np.nansum(w * np.log(x)) / np.nansum(w))

        def hmean(x, w=None):
            if w is None:
                return sp.stats.hmean(x)
            return 1 / (np.nansum(w * (1 / x)) / np.nansum(w))

        def amean(x, w=None):
            if w is None:
                return np.mean(x)
            return np.nansum(w * x) / np.nansum(w)

        report = {
            'target': mcc_known,
            'raw': mcc_raw,
        }

        # print('%r <<<' % (mcc_known,))
        means = {
            'a': amean,
            # 'h': hmean,
            'g': gmean,
        }
        weights = {
            'p': pprob,
            'r': rprob,
            '': None,
        }
        for mean_key, mean in means.items():
            for w_key, w in weights.items():
                # Hack of very wrong items
                if mean_key == 'g':
                    if w_key in ['r', 'p', '']:
                        continue
                if mean_key == 'g':
                    if w_key in ['r']:
                        continue
                m = mean(mccs, w)
                r_key = '{} {}'.format(mean_key, w_key)
                report[r_key] = m
                # print(r_key)
                # print(np.abs(m - mcc_known))

        # print(ut.repr4(report, precision=8))
        return report
        # print('mcc_known = %r' % (mcc_known,))
        # print('mcc_combo1 = %r' % (mcc_combo1,))
        # print('mcc_combo2 = %r' % (mcc_combo2,))
        # print('mcc_combo3 = %r' % (mcc_combo3,))

    # if len(target_names) > len(perclass_data['precision']):
    #     target_names = target_names[:len(perclass_data['precision'])]

    index = pd.Index(target_names, name='class')

    perclass_df = pd.DataFrame(perclass_data, index=index)
    # combined_df = pd.DataFrame(combined_data, index=['ave/sum'])
    combined_df = pd.DataFrame(combined_data, index=['combined'])

    metric_df = pd.concat([perclass_df, combined_df])
    metric_df.index.name = 'class'
    metric_df.columns.name = 'metric'

    pred_id = ['%s' % m for m in target_names]
    real_id = ['%s' % m for m in target_names]
    confusion_df = pd.DataFrame(confusion, columns=pred_id, index=real_id)

    confusion_df = confusion_df.append(
        pd.DataFrame([confusion.sum(axis=0)], columns=pred_id, index=['Σp']))
    confusion_df['Σr'] = np.hstack([confusion.sum(axis=1), [0]])
    confusion_df.index.name = 'real'
    confusion_df.columns.name = 'pred'

    _residual = (confusion_df - np.floor(confusion_df)).values
    _thresh = 1e-6
    if np.all(_residual < _thresh):
        confusion_df = confusion_df.astype(np.int)
    confusion_df.iloc[(-1, -1)] = N
    _residual = (confusion_df - np.floor(confusion_df)).values
    if np.all(_residual < _thresh):
        confusion_df = confusion_df.astype(np.int)

    if verbose:
        cfsm_str = confusion_df.to_string(
            float_format=lambda x: '%.1f' % (x, ))
        print('Confusion Matrix (real × pred) :')
        print(ub.indent(cfsm_str))

        # ut.cprint('\nExtended Report', 'turquoise')
        print('\nEvaluation Metric Report:')
        float_precision = 2
        float_format = '%.' + str(float_precision) + 'f'
        ext_report = metric_df.to_string(float_format=float_format)
        print(ub.indent(ext_report))

    report = {
        'metrics': metric_df,
        'confusion': confusion_df,
    }

    # TODO: What is the difference between sklearn multiclass-MCC
    # and BM * MK MCC?

    try:
        mcc = sklearn.metrics.matthews_corrcoef(y_true,
                                                y_pred,
                                                sample_weight=sample_weight)
        # mcc = matthews_corrcoef(y_true, y_pred, sample_weight=sample_weight)
        # These scales are chosen somewhat arbitrarily in the context of a
        # computer vision application with relatively reasonable quality data
        # https://stats.stackexchange.com/questions/118219/how-to-interpret
        mcc_significance_scales = ub.odict([
            (1.0, 'perfect'),
            (0.9, 'very strong'),
            (0.7, 'strong'),
            (0.5, 'significant'),
            (0.3, 'moderate'),
            (0.2, 'weak'),
            (0.0, 'negligible'),
        ])
        for k, v in mcc_significance_scales.items():
            if np.abs(mcc) >= k:
                if verbose:
                    print('classifier correlation is %s' % (v, ))
                break
        if verbose:
            float_precision = 2
            print(('MCC\' = %.' + str(float_precision) + 'f') % (mcc, ))
        report['mcc'] = mcc
    except ValueError:
        report['mcc'] = None
    return report
Exemplo n.º 17
0
 def __nice__(self):
     data_repr = repr(self.data)
     if '\n' in data_repr:
         data_repr = ub.indent('\n' + data_repr.lstrip('\n'), '    ')
     return 'data={}'.format(data_repr)
Exemplo n.º 18
0
def do_fine_graine_level_sets(self, mapping):

    inverted = ub.invert_dict(mapping, False)
    for sup, subs in inverted.items():
        print('sup = {!r}'.format(sup))
        for sub in subs:
            if sub in self.name_to_cat:
                cat = self.name_to_cat[sub]
                n = len(self.cid_to_aids[cat['id']])
                if n:
                    print('  * {} = {}'.format(sub, n))

    mapping = get_coarse_mapping()
    inverted = ub.invert_dict(mapping, False)

    fine_grained_map = {}

    custom_fine_grained_map = {v: k for k, vs in {
        'unidentified roundfish': [
            'unidentified roundfish',
            'unidentified roundfish (less than half)',
            'unknown roundfish'
            'Rockfish Unid.'
        ],

        'unidentified sebastomus': [
            'sebastes_2species',
            'unknown sebastomus',
            'unknown rockfish',
            'Thornyhead Unid.',
            'Hexagrammidae sp.',
        ],

        'prickleback': [
            'Prickleback',
            'Stichaeidae',
        ],

        'Flatfish Unid.': [
            'Flatfish Unid.',
            'unknown flatfish',
        ]
    }.items() for v in vs}

    catnames = [cat['name'] for cat in self.cats.values()]
    catnames = list(mapping.keys())

    for name in catnames:
        # normalize the name
        norm = normalize_name(name)
        fine_grained_map[name] = norm

    fine_grained_level_set = ub.invert_dict(fine_grained_map, False)
    print(ub.repr2(fine_grained_level_set))

    for sup, subs in inverted.items():
        print('* COARSE-CLASS = {!r}'.format(sup))
        for norm in sorted(set([normalize_name(sub) for sub in subs])):
            raws = fine_grained_level_set.get(norm, [])
            if raws:
                print('    * fine-class = {!r}'.format(norm))
                if len(raws) > 1:
                    # or list(raws)[0] != norm:
                    print(ub.indent('* raw-classes = {}'.format(ub.repr2(raws, nl=1)), ' ' * 8))

    import networkx as nx
    G = nx.DiGraph()
    for norm in fine_grained_map.values():
        G.add_node(norm)

    for sup, subs in inverted.items():
        G.add_node(sup)
        for norm in sorted(set([normalize_name(sub) for sub in subs])):
            G.add_edge(norm, sup)
    if False:
        import plottool as pt
        pt.show_nx(G, layoutkw=dict(prog='neato'), arrow_width=.1, sep=10)
Exemplo n.º 19
0
def main():
    import ubelt as ub
    header = ub.codeblock('''
        import ubelt as ub
        ti = ub.Timerit(100, bestof=10, verbose=2)

        d = {
            'keyboard_debug': False,
            'snapshot_after_error': True,  # Try to checkpoint before crashing
            'show_prog': True,
            'use_tqdm': None,
            'prog_backend': 'progiter',
            'ignore_inf_loss_parts': False,
            'use_tensorboard': True,
            'export_modules': [],
            'large_loss': 1000,
            'num_keep': 2,
            'keep_freq': 20,
        }

        num_inner_loops = 10000

        def access_dict_direct():
            for i in range(num_inner_loops):
                if d['ignore_inf_loss_parts']:
                    pass
        for timer in ti.reset('access_dict_direct'):
            with timer:
                access_dict_direct()

        ''')

    parts = [header]

    for n in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100, 1000]:
        func_text, funcname = _gen_cluttered_func(n=n)
        time_text = ub.codeblock('''
            {func_text}
            for timer in ti.reset('{funcname}'):
                with timer:
                    {funcname}()
            ''').format(func_text=func_text, funcname=funcname)
        parts.append(time_text)

    block = '\n'.join(parts)

    prog_text = ub.codeblock('''
        import ubelt as ub
        def main():
        {block}

        if __name__ == '__main__':
            main()
        ''').format(block=ub.indent(block))

    # prog_text = 'def main():\n' + ub.indent(block) + 'if __name__ == "__main__":\n    main()'
    fpath = 'bench_local_clutter.py'
    with open(fpath, 'w') as file:
        file.write(prog_text)

    ub.cmd('python ' + fpath, verbose=3)
Exemplo n.º 20
0
    def parse(self, string, info=None):
        """
        Divide the given string into examples and interleaving text.

        Args:
            string (str): string representing the doctest
            info (dict): info about where the string came from in case of an
                error

        Returns:
            list : a list of `DoctestPart` objects

        CommandLine:
            python -m xdoctest.parser DoctestParser.parse

        Example:
            >>> s = 'I am a dummy example with two parts'
            >>> x = 10
            >>> print(s)
            I am a dummy example with two parts
            >>> s = 'My purpose it so demonstrate how wants work here'
            >>> print('The new want applies ONLY to stdout')
            >>> print('given before the last want')
            >>> '''
                this wont hurt the test at all
                even though its multiline '''
            >>> y = 20
            The new want applies ONLY to stdout
            given before the last want
            >>> # Parts from previous examples are executed in the same context
            >>> print(x + y)
            30

            this is simply text, and doesnt apply to the previous doctest the
            <BLANKLINE> directive is still in effect.

        Example:
            >>> from xdoctest import parser
            >>> from xdoctest.docstr import docscrape_google
            >>> from xdoctest import core
            >>> self = parser.DoctestParser()
            >>> docstr = self.parse.__doc__
            >>> blocks = docscrape_google.split_google_docblocks(docstr)
            >>> doclineno = self.parse.__func__.__code__.co_firstlineno
            >>> key, (string, offset) = blocks[-2]
            >>> self._label_docsrc_lines(string)
            >>> doctest_parts = self.parse(string)
            >>> # each part with a want-string needs to be broken in two
            >>> assert len(doctest_parts) == 6
        """
        if DEBUG > 1:
            print('\n===== PARSE ====')
        if sys.version_info.major == 2:  # nocover
            string = utils.ensure_unicode(string)

        if not isinstance(string, six.string_types):
            raise TypeError('Expected string but got {!r}'.format(string))

        string = string.expandtabs()
        # If all lines begin with the same indentation, then strip it.
        min_indent = _min_indentation(string)
        if min_indent > 0:
            string = '\n'.join([l[min_indent:] for l in string.splitlines()])

        labeled_lines = None
        grouped_lines = None
        all_parts = None
        try:
            labeled_lines = self._label_docsrc_lines(string)
            grouped_lines = self._group_labeled_lines(labeled_lines)
            all_parts = list(self._package_groups(grouped_lines))
        except Exception as orig_ex:

            if labeled_lines is None:
                failpoint = '_label_docsrc_lines'
            elif grouped_lines is None:
                failpoint = '_group_labeled_lines'
            elif all_parts is None:
                failpoint = '_package_groups'
            if DEBUG:
                print('<FAILPOINT>')
                print('!!! FAILED !!!')
                print('failpoint = {!r}'.format(failpoint))

                import ubelt as ub
                import traceback
                tb_text = traceback.format_exc()
                tb_text = ub.highlight_code(tb_text)
                tb_text = ub.indent(tb_text)
                print(tb_text)

                print('Failed to parse string = <{[<{[<{[')
                print(string)
                print(']}>a]}>]}>  # end string')

                print('info = {}'.format(ub.repr2(info)))
                print('-----')
                print('orig_ex = {}'.format(orig_ex))
                print('labeled_lines = {}'.format(ub.repr2(labeled_lines)))
                print('grouped_lines = {}'.format(ub.repr2(grouped_lines,
                                                           nl=3)))
                print('all_parts = {}'.format(ub.repr2(all_parts)))
                print('</FAILPOINT>')
                # sys.exit(1)
            raise exceptions.DoctestParseError(
                'Failed to parse doctest in {}'.format(failpoint),
                string=string,
                info=info,
                orig_ex=orig_ex)
        if DEBUG > 1:
            print('\n===== FINISHED PARSE ====')
        return all_parts
Exemplo n.º 21
0
def export_model_code(dpath, model, initkw=None):
    """
    Exports the class used to define a pytorch model as a new python module.

    Exports the minimum amount of code needed to make a self-contained Python
    module defining the pytorch model class. This exports the actual source
    code. The advantage of using this over pickle is that the original code can
    change arbitrarilly because all dependencies on the original code are
    removed in the exported code.

    Args:
        dpath (str): directory to dump the model
        model (tuple or type or object): class or class instance (e.g. torch.nn.Module)
        name (str): name to use for the file (defaults to the classname)
        initkw (dict): if specified, creates the function `make`, which
            initializes the network with the specific arguments.

    Returns:
        str: static_modpath: path to the saved model file.
            While you could put the output path in your PYTHONPATH, it is best
            to use `ub.import_module_from_path` to "load" the model instead.

    CommandLine:
        xdoctest -m netharn.export.exporter export_model_code

    Example:
        >>> from torchvision.models import densenet
        >>> from os.path import basename
        >>> initkw = {'growth_rate': 16}
        >>> model = densenet.DenseNet(**initkw)
        >>> dpath = ub.ensure_app_cache_dir('netharn/tests')
        >>> static_modpath = export_model_code(dpath, model, initkw)
        >>> print('static_modpath = {!r}'.format(static_modpath))
        ...
        >>> print(basename(static_modpath))
        DenseNet_256629.py
        >>> # now the module can be loaded
        >>> module = ub.import_module_from_path(static_modpath)
        >>> loaded = module.make()
        >>> assert model.features.denseblock1.denselayer1.conv2.out_channels == 16
        >>> assert loaded.features.denseblock1.denselayer1.conv2.out_channels == 16
        >>> assert model is not loaded
    """
    if isinstance(model, type):
        model_class = model
    else:
        model_class = model.__class__
    classname = model_class.__name__

    if initkw is None:
        raise NotImplementedError(
            'ERROR: The params passed to the model __init__ must be available')
        footer = ''
    else:
        # First see if we can get away with a simple encoding of initkw
        try:
            # Do not use repr. The text produced is non-deterministic for
            # dictionaries. Instead, use ub.repr2, which is deterministic.
            init_text = ub.repr2(initkw, nl=1)
            eval(init_text, {})
            init_code = ub.codeblock('initkw = {}').format(init_text)
        except Exception:
            # fallback to pickle
            warnings.warn('Initialization params might not be serialized '
                          'deterministically')
            init_bytes = repr(pickle.dumps(initkw, protocol=0))
            init_code = ub.codeblock('''
                import pickle
                initkw = pickle.loads({})
                ''').format(init_bytes)
        init_code = ub.indent(init_code).lstrip()
        # create a function to instanciate the class
        footer = '\n\n' + ub.codeblock('''
            __pt_export_version__ = '{__pt_export_version__}'


            def get_initkw():
                """ creates an instance of the model """
                {init_code}
                return initkw


            def get_model_cls():
                model_cls = {classname}
                return model_cls


            def make():
                """ creates an instance of the model """
                initkw = get_initkw()
                model_cls = get_model_cls()
                model = model_cls(**initkw)
                return model
            ''').format(classname=classname,
                        init_code=init_code,
                        __pt_export_version__=__pt_export_version__)

        # TODO: assert that the name "make" is not used in the model body

    body = closer.source_closure(model_class)

    body_footer = body + footer + '\n'
    # dont need to hash the header, because comments are removed anyway
    hashid = hash_code(body_footer)

    header = ub.codeblock('''
        """
        This module was autogenerated by netharn/export/exporter.py
        original_module={}
        classname={}
        timestamp={}
        hashid={}
        """
        ''').format(model_class.__module__, classname, ub.timestamp(), hashid)

    sourcecode = header + '\n' + body_footer

    static_modname = classname + '_' + hashid[0:6]
    static_modpath = join(dpath, static_modname + '.py')
    with open(static_modpath, 'w') as file:
        file.write(sourcecode)
    return static_modpath
Exemplo n.º 22
0
def count_ubelt_usage():
    """
    import sys, ubelt
    sys.path.append(ubelt.expandpath('~/code/ubelt/dev'))
    from gen_api_for_docs import *  # NOQA
    """
    from count_usage_freq import count_ubelt_usage
    usage = count_ubelt_usage()

    # Reorgnaize data to contain more information
    rows = []
    unseen = usage.copy()
    import ubelt as ub
    for attrname in ub.__all__:
        member = getattr(ub, attrname)
        submembers = getattr(member, '__all__', None)
        if attrname.startswith('util_'):
            if not submembers:
                from mkinit.static_mkinit import _extract_attributes
                submembers = _extract_attributes(member.__file__)
        if submembers:
            for subname in submembers:
                parent_module = 'ubelt.{}'.format(attrname)
                short_name = 'ubelt.{subname}'.format(**locals())
                full_name = '{parent_module}.{subname}'.format(**locals())
                url = 'https://ubelt.readthedocs.io/en/latest/{parent_module}.html#{full_name}'.format(
                    **locals())
                rst_ref = ':func:`{short_name}<{full_name}>`'.format(
                    **locals())
                url_ref = '`{short_name} <{url}>`__'.format(**locals())
                rows.append({
                    'attr': subname,
                    'parent_module': parent_module,
                    'usage': unseen.pop(subname, 0),
                    'short_name': short_name,
                    'full_name': full_name,
                    'url': url,
                    'rst_ref': rst_ref,
                    'url_ref': url_ref,
                })

    attr_to_infos = ub.group_items(rows, lambda x: x['attr'])

    import numpy as np
    import kwarray
    import ubelt as ub

    if ub.argflag('--url-mode'):
        ref_key = 'url_ref'
    else:
        ref_key = 'rst_ref'

    name_len = max(len(row[ref_key]) for row in rows) + 1
    num_len = 16

    gaurd = ('=' * name_len + ' ' + '=' * num_len)
    print(gaurd)
    column_fmt = '{:<' + str(name_len) + '} {:>' + str(num_len) + '}'
    print(column_fmt.format(' Function name ', 'Usefulness'))
    print(gaurd)
    for key, value in usage.items():
        infos = attr_to_infos[key]
        if len(infos) == 0:
            print(column_fmt.format(':func:`ubelt.' + key + '`', value))
        else:
            assert len(infos) == 1
            info = infos[0]
            print(column_fmt.format(info[ref_key], value))
    print(gaurd)

    raw_scores = np.array(list(usage.values()))

    print('\n.. code:: python\n')
    print(
        ub.indent('usage stats = ' + ub.repr2(
            kwarray.stats_dict(raw_scores, median=True, sum=True), nl=1)))

    for attrname in ub.__all__:
        member = getattr(ub, attrname)

        submembers = getattr(member, '__all__', None)

        if attrname.startswith('util_'):
            if not submembers:
                from mkinit.static_mkinit import _extract_attributes
                submembers = _extract_attributes(member.__file__)

        if submembers:
            parent_module = 'ubelt.{}'.format(attrname)

            title = ':mod:`{}`'.format(parent_module)
            print('\n' + title)
            print('-' * len(title))
            for subname in submembers:
                if not subname.startswith('_'):
                    rst_ref = (
                        ':func:`<ubelt.{subname}><{parent_module}.{subname}>`'
                    ).format(subname=subname, parent_module=parent_module)
                    print(rst_ref)
            submembers = dir(member)
Exemplo n.º 23
0
def benchamrk_det_nms():
    """
    Benchmarks different implementations of non-max-supression on the CPU, GPU,
    and using cython / numpy / torch.

    CommandLine:
        xdoctest -m ~/code/kwimage/dev/bench_nms.py benchamrk_det_nms --show

    SeeAlso:
        PJR Darknet NonMax supression
        https://github.com/pjreddie/darknet/blob/master/src/box.c

        Lightnet NMS
        https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116
    """

    # N = 200
    # bestof = 50
    N = 1
    bestof = 1

    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000]

    # max number of boxes yolo will spit out at a time
    max_boxes = 19 * 19 * 5

    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500,
        max_boxes
    ]
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500]

    # Demo values
    xdata = [0, 1, 2, 3, 10, 100, 200, 300, 500]

    if ub.argflag('--small'):
        xdata = [10, 100, 500, 1000, 1500, 2000, 5000, 10000]

    if ub.argflag('--medium'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
        ]

    if ub.argflag('--large'):
        xdata = [
            1000,
            5000,
            10000,
            20000,
            50000,
            100000,
        ]

    if ub.argflag('--extra-large'):
        xdata = [
            1000,
            2000,
            10000,
            20000,
            40000,
            100000,
            200000,
        ]

    title_parts = []

    SMALL_BOXES = ub.argflag('--small-boxes')
    if SMALL_BOXES:
        title_parts.append('small boxes')
    else:
        title_parts.append('large boxes')

    # NOTE: for large images we may have up to 21,850,753 detections!

    thresh = float(ub.argval('--thresh', default=0.4))
    title_parts.append('thresh={:.2f}'.format(thresh))

    from kwimage.algo.algo_nms import available_nms_impls
    valid_impls = available_nms_impls()
    print('valid_impls = {!r}'.format(valid_impls))

    basis = {
        'type': ['ndarray', 'tensor', 'tensor0'],
        # 'daq': [True, False],
        # 'daq': [False],
        # 'device': [None],
        # 'impl': valid_impls,
        'impl': valid_impls + ['auto'],
    }

    if ub.argflag('--daq'):
        basis['daq'] = [True, False]

    # if torch.cuda.is_available():
    #     basis['device'].append(0)

    combos = [
        ub.dzip(basis.keys(), vals) for vals in it.product(*basis.values())
    ]

    def is_valid_combo(combo):
        # if combo['impl'] in {'py', 'cython_cpu'} and combo['device'] is not None:
        #     return False
        # if combo['type'] == 'ndarray' and combo['impl'] == 'cython_gpu':
        #     if combo['device'] is None:
        #         return False
        # if combo['type'] == 'ndarray' and combo['impl'] != 'cython_gpu':
        #     if combo['device'] is not None:
        #         return False

        # if combo['type'].endswith('0'):
        #     if combo['impl'] in {'numpy', 'cython_gpu', 'cython_cpu'}:
        #         return False

        # if combo['type'] == 'ndarray':
        #     if combo['impl'] in {'torch'}:
        #         return False

        REMOVE_SLOW = True
        if REMOVE_SLOW:
            known_bad = [
                {
                    'impl': 'torch',
                    'type': 'tensor'
                },
                {
                    'impl': 'numpy',
                    'type': 'tensor'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor'},
                {
                    'impl': 'cython_cpu',
                    'type': 'tensor'
                },

                # {'impl': 'torch', 'type': 'tensor0'},
                {
                    'impl': 'numpy',
                    'type': 'tensor0'
                },
                # {'impl': 'cython_gpu', 'type': 'tensor0'},
                # {'impl': 'cython_cpu', 'type': 'tensor0'},
                {
                    'impl': 'torchvision',
                    'type': 'ndarray'
                },
            ]
            for known in known_bad:
                if all(combo[key] == val for key, val in known.items()):
                    return False

        return True

    combos = list(filter(is_valid_combo, combos))

    times = ub.ddict(list)
    for num in xdata:

        if num > 10000:
            N = 1
            bestof = 1
        if num > 1000:
            N = 3
            bestof = 1
        if num > 100:
            N = 10
            bestof = 3
        elif num > 10:
            N = 100
            bestof = 10
        else:
            N = 1000
            bestof = 10
        print('\n\n---- number of boxes = {} ----\n'.format(num))

        outputs = {}

        ti = ub.Timerit(N, bestof=bestof, verbose=1)

        # Build random test boxes and scores
        np_dets1 = kwimage.Detections.random(num // 2, scale=1000.0, rng=0)
        np_dets1.data['boxes'] = np_dets1.boxes.to_xywh()

        if SMALL_BOXES:
            max_dim = 100
            np_dets1.boxes.data[..., 2] = np.minimum(np_dets1.boxes.width,
                                                     max_dim).ravel()
            np_dets1.boxes.data[..., 3] = np.minimum(np_dets1.boxes.height,
                                                     max_dim).ravel()

        np_dets2 = copy.deepcopy(np_dets1)
        np_dets2.boxes.translate(10, inplace=True)
        # add boxes that will definately be removed
        np_dets = kwimage.Detections.concatenate([np_dets1, np_dets2])

        # make all scores unique to ensure comparability
        np_dets.scores[:] = np.linspace(0, 1, np_dets.num_boxes())

        np_dets.data['scores'] = np_dets.scores.astype(np.float32)
        np_dets.boxes.data = np_dets.boxes.data.astype(np.float32)

        typed_data = {}
        # ----------------------------------

        import netharn as nh
        for combo in combos:
            print('combo = {}'.format(ub.repr2(combo, nl=0)))

            label = nh.util.make_idstr(combo)
            mode = combo.copy()

            # if mode['impl'] == 'cython_gpu':
            #     mode['device_id'] = mode['device']

            mode_type = mode.pop('type')

            if mode_type in typed_data:
                dets = typed_data[mode_type]
            else:
                if mode_type == 'ndarray':
                    dets = np_dets.numpy()
                elif mode_type == 'tensor':
                    dets = np_dets.tensor(None)
                elif mode_type == 'tensor0':
                    dets = np_dets.tensor(0)
                else:
                    raise KeyError
                typed_data[mode_type] = dets

            for timer in ti.reset(label):
                with timer:
                    keep = dets.non_max_supression(thresh=thresh, **mode)
                    torch.cuda.synchronize()
            times[ti.label].append(ti.min())
            outputs[ti.label] = ensure_numpy_indices(keep)

        # ----------------------------------

        # Check that all kept boxes do not have more than `threshold` ious
        if 0:
            for key, keep_idxs in outputs.items():
                kept = np_dets.take(keep_idxs).boxes
                ious = kept.ious(kept)
                max_iou = (np.tril(ious) - np.eye(len(ious))).max()
                if max_iou > thresh:
                    print('{} produced a bad result with max_iou={}'.format(
                        key, max_iou))

        # Check result consistency:
        print('\nResult stats:')
        for key in sorted(outputs.keys()):
            print('    * {:<20}: num={}'.format(key, len(outputs[key])))

        print('\nResult overlaps (method1, method2: jaccard):')
        datas = []
        for k1, k2 in it.combinations(sorted(outputs.keys()), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / max(len(idxs1 | idxs2), 1)
            datas.append((k1, k2, jaccard))

        datas = sorted(datas, key=lambda x: -x[2])
        for k1, k2, jaccard in datas:
            print('    * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard))

    if True:
        ydata = {key: 1.0 / np.array(vals) for key, vals in times.items()}
        ylabel = 'Hz'
        reverse = True
        yscale = 'symlog'
    else:
        ydata = {key: np.array(vals) for key, vals in times.items()}
        ylabel = 'seconds'
        reverse = False
        yscale = 'linear'
    scores = {key: vals[-1] for key, vals in ydata.items()}
    ydata = ub.dict_subset(ydata, ub.argsort(scores, reverse=reverse))

    ###
    times_of_interest = [0, 10, 100, 200, 1000]
    times_of_interest = xdata

    lines = []
    record = lines.append
    record('### times_of_interest = {!r}'.format(times_of_interest))
    for x in times_of_interest:

        if times_of_interest[-1] == x:
            record('else:')
        elif times_of_interest[0] == x:
            record('if num <= {}:'.format(x))
        else:
            record('elif num <= {}:'.format(x))

        if x in xdata:
            pos = xdata.index(x)
            score_wrt_x = {}
            for key, vals in ydata.items():
                score_wrt_x[key] = vals[pos]

            typekeys = ['tensor0', 'tensor', 'ndarray']
            type_groups = dict([(b,
                                 ub.group_items(score_wrt_x,
                                                lambda y: y.endswith(b))[True])
                                for b in typekeys])
            # print('\n=========')
            # print('x = {!r}'.format(x))
            record('    if code not in {!r}:'.format(set(typekeys)))
            record('        raise KeyError(code)')
            for typekey, group in type_groups.items():
                # print('-------')
                record('    if code == {!r}:'.format(typekey))
                # print('typekey = {!r}'.format(typekey))
                # print('group = {!r}'.format(group))
                group_x = ub.dict_isect(score_wrt_x, group)
                valid_keys = ub.argsort(group_x, reverse=True)
                valid_x = ub.dict_subset(group_x, valid_keys)
                # parts = [','.split(k) for k in valid_keys]
                ordered_impls = []
                ordered_impls2 = ub.odict()
                for k in valid_keys:
                    vals = valid_x[k]
                    p = k.split(',')
                    d = dict(i.split('=') for i in p)
                    ordered_impls2[d['impl']] = vals
                    ordered_impls.append(d['impl'])

                ordered_impls = list(ub.oset(ordered_impls) - {'auto'})
                ordered_impls2.pop('auto')
                record('        # {}'.format(
                    ub.repr2(ordered_impls2, precision=1, nl=0,
                             explicit=True)))
                record('        preference = {}'.format(
                    ub.repr2(ordered_impls, nl=0)))
    record('### end times of interest ')
    print(ub.indent('\n'.join(lines), ' ' * 8))
    ###

    markers = {
        key: 'o' if 'auto' in key else ''
        for key, score in scores.items()
    }

    if ub.argflag('--daq'):
        markers = {
            key: '+' if 'daq=True' in key else ''
            for key, score in scores.items()
        }

    labels = {
        key: '{:.2f} {} - {}'.format(score, ylabel[0:3], key)
        for key, score in scores.items()
    }

    title = 'NSM-impl speed: ' + ', '.join(title_parts)

    import kwplot
    kwplot.autompl()
    kwplot.multi_plot(
        xdata,
        ydata,
        xlabel='num boxes',
        ylabel=ylabel,
        label=labels,
        yscale=yscale,
        title=title,
        marker=markers,
        # xscale='symlog',
    )

    kwplot.show_if_requested()
Exemplo n.º 24
0
def make_default_module_maintest(modpath, test_code=None, argv=None,
                                 force_full=False):
    """
    Args:
        modname (str):  module name

    Returns:
        str: text source code

    CommandLine:
        python -m utool.util_autogen --test-make_default_module_maintest

    References:
        http://legacy.python.org/dev/peps/pep-0338/

    Example:
        >>> import sys, ubelt as ub
        >>> sys.path.append(ub.truepath('~/local/vim/rc/'))
        >>> from pyvim_funcs import *
        >>> import pyvim_funcs
        >>> modpath = pyvim_funcs.__file__
        >>> argv = None
        >>> text = make_default_module_maintest(modpath)
        >>> print(text)
    """
    # if not use_modrun:
    #     if ub.WIN32:
    #         augpath = 'set PYTHONPATH=%PYTHONPATH%' + os.pathsep + moddir
    #     else:
    #         augpath = 'export PYTHONPATH=$PYTHONPATH' + os.pathsep + moddir
    #     cmdline = augpath + '\n' + cmdline
    import ubelt as ub
    from xdoctest import static_analysis as static

    modname = static.modpath_to_modname(modpath)
    moddir, rel_modpath = static.split_modpath(modpath)
    if not force_full:
        info = ub.cmd('python -c "import sys; print(sys.path)"')
        default_path = eval(info['out'], {})
        is_importable = static.is_modname_importable(modname, exclude=['.'],
                                                     sys_path=default_path)
    if not force_full and is_importable:
        cmdline = 'python -m ' + modname
    else:
        if ub.WIN32:
            modpath = ub.compressuser(modpath, home='%HOME%')
            cmdline = 'python -B ' + modpath.replace('\\', '/')
        else:
            modpath = ub.compressuser(modpath, home='~')
            cmdline = 'python ' + modpath

    if test_code is None:
        test_code = ub.codeblock(
            r'''
            import xdoctest
            xdoctest.doctest_module(__file__)
            ''')
        if argv is None:
            argv = ['all']

    if argv is None:
        argv = []

    cmdline_ = ub.indent(cmdline + ' ' + ' '.join(argv), ' ' * 8).lstrip(' ')
    test_code = ub.indent(test_code, ' ' * 4).lstrip(' ')
    text = ub.codeblock(
        r'''
        if __name__ == '__main__':
            {rr}"""
            CommandLine:
                {cmdline_}
            """
            {test_code}
        '''
    ).format(cmdline_=cmdline_, test_code=test_code, rr='{r}')
    text = text.format(r='r' if '\\' in text else '')
    return text
Exemplo n.º 25
0
 def __nice__(self):
     # return self.format + ', shape=' + str(list(self.data.shape))
     data_repr = repr(self.data)
     if '\n' in data_repr:
         data_repr = ub.indent('\n' + data_repr.lstrip('\n'), '    ')
     return '{}, {}'.format(self.format, data_repr)
Exemplo n.º 26
0
def make_prototext(image_list_fpath, arch, mode='fit', batch_size=1,
                   n_classes=None, class_weights=None, ignore_label=None,
                   shuffle=None, params=None):

    assert mode in {'fit', 'predict'}
    mod = model_modules[arch]
    if shuffle is None:
        shuffle = (mode == 'fit')

    if n_classes is None:
        n_classes = len(class_weights)
    elif ignore_label is not None:
        # this is really weird
        # with 12 classes we need to make the number of outputs be 11 because
        # we are ignoring the last label. However, when class_weights are
        # passed in we only send it the used weights, so that's already the
        # right number. Not sure what happend when ignore_label=0 and not 11
        n_classes -= 1

    fmtdict = {
        'shuffle': str(shuffle).lower(),
        'batch_size': batch_size,
        'image_list_fpath': image_list_fpath,
        'n_classes': n_classes,
        'arch_name': arch,
    }

    if image_list_fpath is None:
        # Input layer when we use blobs
        # maybe use this def instead?
        # layer {
        #   name: "input"
        #   type: "Input"
        #   top: "data"
        #   input_param {
        #     shape {
        #       dim: 1
        #       dim: 3
        #       dim: 360
        #       dim: 480
        #     }
        #   }
        # }
        input_layer_fmt = ub.codeblock(
            '''
            input: "data"
            input_dim: {batch_size}
            input_dim: 3
            input_dim: 360
            input_dim: 480
            ''')
    else:
        # Layer when input is specified in a txt
        input_layer_fmt = ub.codeblock(
            '''
            name: "{arch_name}"
            layer {{
              name: "data"
              type: "DenseImageData"
              top: "data"
              top: "label"
              dense_image_data_param {{
                source: "{image_list_fpath}"
                batch_size: {batch_size}
                shuffle: {shuffle}
              }}
            }}
            '''
        )

    input_layer = input_layer_fmt.format(**fmtdict)

    if hasattr(mod, 'make_core_layers'):
        if params is not None:
            freeze_before = params['freeze_before']
            finetune_decay = params['finetune_decay']
        else:
            freeze_before = 0
            finetune_decay = 1
        core = mod.make_core_layers(n_classes, freeze_before, finetune_decay)
    else:
        core = mod.CORE_LAYERS.format(**fmtdict)

    if mode == 'fit':
        # remove batch-norm inference when fitting
        core = re.sub('^\s*bn_mode:\s*INFERENCE$', '', core, flags=re.M)
        class_weights_line = ['class_weighting: {}'.format(w) for w in class_weights]
        class_weights_line += ['ignore_label: {}'.format(ignore_label)]
        class_weights_text = ub.indent('\n'.join(class_weights_line), ' ' * 4).lstrip()
        fmtdict['class_weights_text'] = class_weights_text
        footer_fmt = mod.FIT_FOOTER
    else:
        footer_fmt = mod.PREDICT_FOOTER

    footer = footer_fmt.format(**fmtdict)

    text = '\n'.join([input_layer, core, footer])
    return text