Пример #1
0
    def _build_index(self):
        """ build reverse indexes """
        # create index
        anns, cats, imgs = {}, {}, {}
        gid_to_aids = ub.ddict(list)
        cid_to_gids = ub.ddict(list)
        cid_to_aids = ub.ddict(list)

        for ann in self.dataset.get('annotations', []):
            gid_to_aids[ann['image_id']].append(ann['id'])
            anns[ann['id']] = ann

        for img in self.dataset.get('images', []):
            imgs[img['id']] = img

        for cat in self.dataset.get('categories', []):
            cats[cat['id']] = cat

        if anns and cats:
            for ann in self.dataset['annotations']:
                cid_to_gids[ann['category_id']].append(ann['image_id'])

        for cat, gids in cid_to_gids.items():
            aids = [aid for gid in gids for aid in gid_to_aids[gid]]
            cid_to_aids[cat] = aids

        # create class members
        self.gid_to_aids = gid_to_aids
        self.cid_to_gids = cid_to_gids
        self.cid_to_aids = cid_to_aids
        self.anns = anns
        self.imgs = imgs
        self.cats = cats
Пример #2
0
def count_ubelt_usage():
    import ubelt as ub
    import glob
    from os.path import join
    names = [
        'xdoctest',
        'netharn',
        'xdev',
        'xinspect',
        'ndsampler',
        'kwil',
        'kwarray',
        'kwimage',
        'kwplot',
        'scriptconfig',
    ]

    all_fpaths = []
    for name in names:
        repo_fpath = ub.expandpath(join('~/code', name))
        fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True)
        for fpath in fpaths:
            all_fpaths.append((name, fpath))

    import re
    pat = re.compile(r'\bub\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b')

    import ubelt as ub

    pkg_to_hist = ub.ddict(lambda: ub.ddict(int))
    for name, fpath in ub.ProgIter(all_fpaths):
        text = open(fpath, 'r').read()
        for match in pat.finditer(text):
            attr = match.groupdict()['attr']
            if attr in ub.__all__:
                pkg_to_hist[name][attr] += 1

    hist_iter = iter(pkg_to_hist.values())
    usage = next(hist_iter).copy()
    for other in hist_iter:
        for k, v in other.items():
            usage[k] += v
    for attr in ub.__all__:
        usage[attr] += 0

    for name in pkg_to_hist.keys():
        pkg_to_hist[name] = ub.odict(
            sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1])

    usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1])

    print(ub.repr2(pkg_to_hist, nl=2))
    print(ub.repr2(usage, nl=1))
Пример #3
0
    def score_voc(dmet, ovthresh=0.5, bias=1, method='voc2012', gids=None):
        recs = {}
        cx_to_lines = ub.ddict(list)
        # confusions = []
        if gids is None:
            gids = dmet.pred.imgs.keys()
        for gid in gids:
            pred_annots = dmet.pred.annots(gid=gid)
            true_annots = dmet.true.annots(gid=gid)

            true_boxes = true_annots.boxes
            true_cxs = true_annots.cids
            true_weights = true_annots._lookup('weight')

            pred_boxes = pred_annots.boxes
            pred_cxs = pred_annots.cids
            pred_scores = pred_annots._lookup('score')

            recs[gid] = []
            for bbox, cx, weight in zip(true_boxes.to_tlbr().data, true_cxs,
                                        true_weights):
                recs[gid].append({
                    'bbox': bbox,
                    'difficult': weight < .5,
                    'name': cx
                })

            for bbox, cx, score in zip(pred_boxes.to_tlbr().data, pred_cxs,
                                       pred_scores):
                cx_to_lines[cx].append([gid, score] + list(bbox))

        perclass = ub.ddict(dict)
        for cx in cx_to_lines.keys():
            lines = cx_to_lines[cx]
            classname = cx
            rec, prec, ap = voc_eval(lines,
                                     recs,
                                     classname,
                                     ovthresh=ovthresh,
                                     bias=bias,
                                     method=method)
            perclass[cx]['pr'] = (rec, prec)
            perclass[cx]['ap'] = ap

        mAP = np.nanmean([d['ap'] for d in perclass.values()])
        voc_scores = {
            'mAP': mAP,
            'perclass': perclass,
        }
        return voc_scores
Пример #4
0
    def _get_cm_edge_data(infr, edges, cm_list=None):
        symmetric = True

        if cm_list is None:
            cm_list = infr.cm_list
        # Find scores for the edges that exist in the graph
        edge_to_data = ub.ddict(dict)
        aid_to_cm = {cm.qaid: cm for cm in cm_list}
        for u, v in edges:
            if symmetric:
                u, v = e_(u, v)
            cm1 = aid_to_cm.get(u, None)
            cm2 = aid_to_cm.get(v, None)
            scores = []
            ranks = []
            for cm in util.filter_Nones([cm1, cm2]):
                for aid in [u, v]:
                    idx = cm.daid2_idx.get(aid, None)
                    if idx is None:
                        continue
                    score = cm.annot_score_list[idx]
                    rank = cm.get_annot_ranks([aid])[0]
                    scores.append(score)
                    ranks.append(rank)
            if len(scores) == 0:
                score = None
                rank = None
            else:
                # Choose whichever one gave the best score
                idx = util.safe_argmax(scores, nans=False)
                score = scores[idx]
                rank = ranks[idx]
            edge_to_data[(u, v)]['score'] = score
            edge_to_data[(u, v)]['rank'] = rank
        return edge_to_data
Пример #5
0
    def _BROKEN_rank_epochs(monitor):
        """
        FIXME:
            broken - implement better rank aggregation with custom weights

        Example:
            >>> monitor = demodata_monitor()
            >>> monitor._BROKEN_rank_epochs()
        """
        rankings = {}
        for key, value in monitor.best_epochs(smooth=False).items():
            rankings[key + '_raw'] = value

        for key, value in monitor.best_epochs(smooth=True).items():
            rankings[key + '_smooth'] = value

        # borda-like weighted rank aggregation.
        # probably could do something better.
        epoch_to_weight = ub.ddict(lambda: 0)
        for key, ranking in rankings.items():
            # weights = np.linspace(0, 1, num=len(ranking))[::-1]
            weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100
            for epoch, w in zip(ranking, weights):
                epoch_to_weight[epoch] += w

        agg_ranking = ub.argsort(epoch_to_weight)[::-1]
        return agg_ranking
Пример #6
0
    def _choose_indices(harn):
        """
        Hack to pick several images from the validation set to monitor each
        epoch.
        """
        tag = harn.current_tag
        dset = harn.loaders[tag].dataset

        cid_to_gids = ub.ddict(set)
        empty_gids = []
        for gid in range(len(dset)):
            annots = dset._load_annotation(gid)
            if len(annots['gt_classes']) == 0:
                empty_gids.append(gid)
            for cid, ishard in zip(annots['gt_classes'], annots['gt_ishard']):
                if not ishard:
                    cid_to_gids[cid].add(gid)

        # Choose an image with each category
        chosen_gids = set()
        for cid, gids in cid_to_gids.items():
            for gid in gids:
                if gid not in chosen_gids:
                    chosen_gids.add(gid)
                    break

        # Choose an image with nothing in it (if it exists)
        if empty_gids:
            chosen_gids.add(empty_gids[0])

        chosen_indices = chosen_gids
        harn.chosen_indices[tag] = sorted(chosen_indices)
Пример #7
0
def bench_sort_dictionary():
    """
    CommandLine:
        xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_sort_dictionary

    Results:
        Timed best=25.484 µs, mean=25.701 ± 0.1 µs for itemgetter
        Timed best=28.810 µs, mean=29.138 ± 0.3 µs for lambda
    """
    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    ti = ub.Timerit(1000, bestof=10, verbose=1)
    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]

    for timer in ti.reset('lambda'):
        with timer:
            key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])]

    del key_order
Пример #8
0
    def group_pfiles(cls, pfiles, step_idx=None):
        """
        Creates groups of pfiles that *might* be the same.

        Example:
            >>> fpaths = _demodata_files()
            >>> pfiles = [ProgressiveFile(f) for f in fpaths]
            >>> groups1 = ProgressiveFile.group_pfiles(pfiles)
            >>> for pfile in pfiles:
            >>>     pfile.refine()
            >>> groups2 = ProgressiveFile.group_pfiles(pfiles)
            >>> for pfile in pfiles[0::2]:
            >>>     pfile.refine()
            >>> groups3 = ProgressiveFile.group_pfiles(pfiles)
            >>> for pfile in pfiles[1::2]:
            >>>     pfile.refine()
            >>> groups4 = ProgressiveFile.group_pfiles(pfiles)
        """
        if step_idx is not None:
            # We are given the step idx to use, so do that
            final_groups = ub.group_items(pfiles,
                                          key=lambda x: x.step_id(step_idx))
        else:
            # Otherwise do something reasonable
            size_groups = ub.group_items(pfiles, key=lambda x: x.size)
            final_groups = ub.ddict(list)
            for group in size_groups.values():
                # we have to use the minimum refine step available
                # for any unfinished pfile to ensure consistency
                step_idx = ProgressiveFile.compatible_step_idx(group)
                step_groups = ub.group_items(group,
                                             key=lambda x: x.step_id(step_idx))
                for key, val in step_groups.items():
                    final_groups[key].extend(val)
        return final_groups
Пример #9
0
    def score_netharn(dmet, ovthresh=0.5, bias=0, method='voc2012', gids=None):
        y_accum = ub.ddict(list)
        # confusions = []
        if gids is None:
            gids = dmet.pred.imgs.keys()
        for gid in gids:
            pred_annots = dmet.pred.annots(gid=gid)
            true_annots = dmet.true.annots(gid=gid)

            true_boxes = true_annots.boxes
            true_cxs = true_annots.cids
            true_weights = true_annots._lookup('weight')

            pred_boxes = pred_annots.boxes
            pred_cxs = pred_annots.cids
            pred_scores = pred_annots._lookup('score')

            y = detection_confusions(true_boxes,
                                     true_cxs,
                                     true_weights,
                                     pred_boxes,
                                     pred_scores,
                                     pred_cxs,
                                     bg_weight=1.0,
                                     ovthresh=ovthresh,
                                     bg_cls=-1,
                                     bias=bias)
            y['gid'] = [gid] * len(y['pred'])
            for k, v in y.items():
                y_accum[k].extend(v)

        y_df = pd.DataFrame(y_accum)

        # class agnostic score
        ap, prec, rec = pr_curves(y_df)
        peritem = {
            'ap': ap,
            'pr': (prec, rec),
        }

        # perclass scores
        perclass = {}
        cx_to_group = dict(iter(y_df.groupby('cx')))
        for cx in cx_to_group:
            # for cx, group in cx_to_group.items():
            group = cx_to_group.get(cx, None)
            ap, prec, rec = pr_curves(group, method=method)
            perclass[cx] = {
                'ap': ap,
                'pr': (prec, rec),
            }

        mAP = np.nanmean([d['ap'] for d in perclass.values()])
        nh_scores = {'mAP': mAP, 'perclass': perclass, 'peritem': peritem}
        return nh_scores
Пример #10
0
def bench_closures():
    """
    Is it faster to use a closure or pass in the variables explicitly?
    """
    import ubelt as ub
    import timerit
    import numpy as np

    # Test a nested func with vs without a closure
    def rand_complex(*shape):
        real = np.random.rand(*shape).astype(np.complex)
        imag = np.random.rand(*shape).astype(np.complex) * 1j
        mat = real + imag
        return mat

    s = int(ub.argval('--s', default='1'))
    mat1 = rand_complex(s, s)
    mat2 = rand_complex(s, s)
    N = 1000
    offset = 100

    def nested_closure():
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    def nested_explicit(mat1, mat2, N, offset):
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    ti = timerit.Timerit(int(2**11),
                         bestof=int(2**8),
                         verbose=int(ub.argval('--verbose', default='1')))

    for timer in ti.reset('nested_explicit'):
        with timer:
            nested_explicit(mat1, mat2, N, offset)

    for timer in ti.reset('nested_closure'):
        with timer:
            nested_closure()

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9,
                                             nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
Пример #11
0
    def _get_cm_agg_aid_ranking(infr, cc):
        aid_to_cm = {cm.qaid: cm for cm in infr.cm_list}
        all_scores = ub.ddict(list)
        for qaid in cc:
            cm = aid_to_cm[qaid]
            # should we be doing nids?
            for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()):
                all_scores[daid].append(score)

        max_scores = sorted(
            (max(scores), aid) for aid, scores in all_scores.items())[::-1]
        ranked_aids = util.take_column(max_scores, 1)
        return ranked_aids
Пример #12
0
    def image_from_each_dataset():
        groups = ub.ddict(list)
        for gid, img in merged.imgs.items():
            groups[os.path.dirname(img['file_name'])].append(gid)

        gid_groups = []
        for gids in groups.values():
            gids = sort_gids_by_nannots(gids)
            gid_groups.append(gids)

        # round robin sample
        datas = [gid for x in zip(*gid_groups) for gid in x]
        return datas
Пример #13
0
def check_relationships(branches):

    ancestors = {b: set() for b in branches}
    length = len(branches) * (len(branches) - 1)
    for b1, b2 in ub.ProgIter(it.combinations(branches, 2), length=length):
        ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b1, b2))['ret']
        if ret == 0:
            ancestors[b1].add(b2)
        ret = ub.cmd('git merge-base --is-ancestor {} {}'.format(b2, b1))['ret']
        if ret == 0:
            ancestors[b2].add(b1)
    print('<key> is an ancestor of <value>')
    print(ub.repr2(ancestors))

    descendants = {b: set() for b in branches}
    for key, others in ancestors.items():
        for o in others:
            descendants[o].add(key)
    print('<key> descends from <value>')
    print(ub.repr2(descendants))

    import plottool as pt
    import networkx as nx
    G = nx.DiGraph()
    G.add_nodes_from(branches)
    for key, others in ancestors.items():
        for o in others:
            # G.add_edge(key, o)
            G.add_edge(o, key)

    from networkx.algorithms.connectivity.edge_augmentation import collapse
    flag = True
    G2 = G
    while flag:
        flag = False
        for u, v in list(G2.edges()):
            if G2.has_edge(v, u):
                G2 = collapse(G2, [[u, v]])

                node_relabel = ub.ddict(list)
                for old, new in G2.graph['mapping'].items():
                    node_relabel[new].append(old)
                G2 = nx.relabel_nodes(G2, {k: '\n'.join(v) for k, v in node_relabel.items()})
                flag = True
                break

    G3 = nx.transitive_reduction(G2)
    pt.show_nx(G3, arrow_width=1.5, prog='dot', layoutkw=dict(prog='dot'))
    pt.zoom_factory()
    pt.pan_factory()
    pt.plt.show()
Пример #14
0
def varied_values(dict_list, min_variations=1):
    """
    Given a list of dictionaries, find the values that differ between them

    Args:
        dict_list (List[Dict]):
            The values of the dictionary must be hashable. Lists will be
            converted into tuples.

        min_variations (int, default=1): minimum number of variations to return

    TODO:
        - [ ] Is this a ubelt function?

    Example:
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/misc/notes'))
        >>> from hardwareinfo.backend_linux import *  # NOQA
        >>> num_keys = 10
        >>> num_dicts = 10
        >>> all_keys = {ub.hash_data(i)[0:16] for i in range(num_keys)}
        >>> dict_list = [
        >>>     {key: ub.hash_data(key)[0:16] for key in all_keys}
        >>>     for _ in range(num_dicts)
        >>> ]
        >>> import random
        >>> rng = random.Random(0)
        >>> for data in dict_list:
        >>>     if rng.random() > 0.5:
        >>>         for key in list(data):
        >>>             if rng.random() > 0.9:
        >>>                 data[key] = rng.randint(1, 32)
        >>> varied = varied_values(dict_list)
        >>> print('varied = {}'.format(ub.repr2(varied, nl=1)))
    """
    all_keys = set()
    for data in dict_list:
        all_keys.update(data.keys())

    varied = ub.ddict(set)
    for data in dict_list:
        for key in all_keys:
            value = data.get(key, ub.NoParam)
            if isinstance(value, list):
                value = tuple(value)
            varied[key].add(value)

    for key, values in list(varied.items()):
        if len(values) <= min_variations:
            del varied[key]
    return varied
Пример #15
0
def rank_inventory(inventory):
    candidates = list(ub.flatten(list(pkmn.family(ancestors=False, node=True))
                                 for pkmn in inventory))

    groups = ub.group_items(candidates, key=lambda p: p.name)

    leages = {
        'master': {'max_cp': float('inf')},
        'ultra': {'max_cp': 2500},
        'great': {'max_cp': 1500},
        'little': {'max_cp': 500},
    }

    max_level = 45  # for XL candy
    # max_level = 40  # normal

    all_dfs = []

    for name, group in groups.items():
        print('\n\n------------\n\n')
        print('name = {!r}'.format(name))
        for leage_name, leage_filters in leages.items():
            max_cp = leage_filters['max_cp']
            print('')
            print(' ========== ')
            print(' --- {} in {} --- '.format(name, leage_name))
            not_eligible = [p for p in group if p.cp is not None and p.cp > max_cp]
            eligible = [p for p in group if p.cp is None or p.cp <= max_cp]
            print('not_eligible = {!r}'.format(not_eligible))
            if len(eligible) > 0:
                first = ub.peek(eligible)
                have_ivs = eligible
                df = first.leage_rankings_for(have_ivs, max_cp=max_cp,
                                              max_level=max_level)
                all_dfs.append(df)
            else:
                print('none eligable')

    # Print out the best ranks for each set of IVS over all possible forms
    # (lets you know which ones can be transfered safely)

    iv_to_rank = ub.ddict(list)
    for df in all_dfs:
        if df is not None:
            df = df.set_index(['iva', 'ivd', 'ivs'])
            for iv, rank in zip(df.index, df['rank']):
                iv_to_rank[iv].append(rank)

    iv_to_best_rank = ub.map_vals(sorted, iv_to_rank)
    iv_to_best_rank = ub.sorted_vals(iv_to_best_rank)
    print('iv_to_best_rank = {}'.format(ub.repr2(iv_to_best_rank, nl=1, align=':')))
Пример #16
0
    def __init__(evaluator, classes):
        evaluator.classes = classes

        # Remember metrics for each image individually
        evaluator.gid_to_metrics = {}

        # accum is a dictionary that will hold different metrics we accumulate
        evaluator.accum = ub.ddict(lambda : 0)

        # Estimate contains our current averaged metrics
        evaluator.estimate = {}

        # We don't care how we predict for the void class
        evaluator.void_idx = classes.index('background')
Пример #17
0
def bench_dict_hist():

    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    OrderedDict = ub.odict

    ti = ub.Timerit(1000, bestof=10, verbose=2)

    for timer in ti.reset('time'):
        with timer:
            getval = op.itemgetter(1)
            key_order = (key for (key, value) in sorted(hist_.items(), key=getval))
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('time'):
        with timer:
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            hist = OrderedDict([
                (key, value)
                for (key, value) in sorted(hist_.items(), key=getval)
            ])

    # -----------------

    for timer in ti.reset('itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]

    for timer in ti.reset('lambda'):
        with timer:
            key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])]
Пример #18
0
def bench_dict_hist():
    """
    CommandLine:
        xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_dict_hist

    Results:
        Timed best=48.330 µs, mean=49.437 ± 1.0 µs for dict_subset_iter
        Timed best=59.392 µs, mean=63.395 ± 11.9 µs for dict_subset_list
        Timed best=47.203 µs, mean=47.632 ± 0.2 µs for direct_itemgetter
    """

    import operator as op
    import ubelt as ub

    import random
    import string
    rng = random.Random(0)
    items = [rng.choice(string.printable) for _ in range(5000)]
    hist_ = ub.ddict(lambda: 0)
    for item in items:
        hist_[item] += 1

    OrderedDict = ub.odict

    ti = ub.Timerit(1000, bestof=10, verbose=1)

    for timer in ti.reset('dict_subset_iter'):
        with timer:
            getval = op.itemgetter(1)
            key_order = (key for (key, value) in sorted(hist_.items(), key=getval))
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('dict_subset_list'):
        with timer:
            getval = op.itemgetter(1)
            key_order = [key for (key, value) in sorted(hist_.items(), key=getval)]
            hist = ub.dict_subset(hist_, key_order)

    for timer in ti.reset('direct_itemgetter'):
        with timer:
            # WINNER
            getval = op.itemgetter(1)
            hist = OrderedDict([
                (key, value)
                for (key, value) in sorted(hist_.items(), key=getval)
            ])

    del hist
Пример #19
0
 def find_neg_nid_freq_to(infr, cc):
     """
     Find the number of edges leaving `cc` and directed towards specific
     names.
     """
     pos_graph = infr.pos_graph
     neg_graph = infr.neg_graph
     neg_nid_freq = ub.ddict(lambda: 0)
     for u in cc:
         nid1 = pos_graph.node_label(u)
         for v in neg_graph.neighbors(u):
             nid2 = pos_graph.node_label(v)
             if nid1 == nid2 and v not in cc:
                 continue
             neg_nid_freq[nid2] += 1
     return neg_nid_freq
Пример #20
0
 def parse_timemap_from_blocks(self, profile_block_list):
     """
     Build a map from times to line_profile blocks
     """
     prefix_list = []
     timemap = ub.ddict(list)
     for ix in range(len(profile_block_list)):
         block = profile_block_list[ix]
         total_time = self.get_block_totaltime(block)
         # Blocks without time go at the front of sorted output
         if total_time is None:
             prefix_list.append(block)
         # Blocks that are not run are not appended to output
         elif total_time != 0:
             timemap[total_time].append(block)
     return prefix_list, timemap
Пример #21
0
def group_pairs(pair_list):
    """
    Groups a list of items using the first element in each pair as the item and
    the second element as the groupid.

    Args:
        pair_list (list): list of 2-tuples (item, groupid)

    Returns:
        dict: groupid_to_items: maps a groupid to a list of items
    """
    # Initialize dict of lists
    groupid_to_items = ub.ddict(list)
    # Insert each item into the correct group
    for item, groupid in pair_list:
        groupid_to_items[groupid].append(item)
    return groupid_to_items
Пример #22
0
    def _check_datas(task):
        scene_im_paths, scene_gt_paths = task._load_all_scene_paths()
        keys = task._preprocessing_keys()
        key_to_num = ub.ddict(list)
        for scene, key in it.product(task.scene_ids, keys):
            im_paths = scene_im_paths[scene][key]
            gt_paths = scene_gt_paths[scene][key]
            assert len(im_paths) == len(gt_paths)
            assert len(im_paths) > 0
            key_to_num[key] += [len(im_paths)]

        for key, ns in key_to_num.items():
            ns_set = set(ns)
            if len(ns_set) != 1:
                print('key    = {!r}'.format(key))
                print('ns_set = {!r}'.format(ns_set))
                print('--')
Пример #23
0
def main():
    import ubelt as ub
    from ubelt import util_list
    from ubelt.util_list import take
    import random
    from math import e

    # # Data
    N = 100
    array = [random.random() for _ in range(N)]
    indices = [random.randint(0, N - 1) for _ in range(int(N // e))]

    ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1)

    for timer in ti.reset('take'):
        with timer:
            list(take(array, indices))

    for timer in ti.reset('util_list.take'):
        with timer:
            list(util_list.take(array, indices))

    for timer in ti.reset('ub.take'):
        with timer:
            list(ub.take(array, indices))

    print('---')

    # import pandas as pd
    # df = pd.DataFrame(rankings)
    # print('df =\n{}'.format(df))

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))
Пример #24
0
    def best_epochs(monitor):
        rankings = {}

        def _rank(key, metrics, type='min'):
            values = [m[key] for m in metrics]
            sortx = np.argsort(values)
            if type == 'max':
                sortx = np.argsort(values)[::-1]
            elif type == 'min':
                sortx = np.argsort(values)
            else:
                raise KeyError(type)
            ranked_epochs = np.array(monitor.epochs)[sortx]
            return ranked_epochs

        for key in monitor.min_keys:
            rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'min')
            rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'min')

        for key in monitor.max_keys:
            rankings[key + '_raw'] = _rank(key, monitor.raw_metrics, 'max')
            rankings[key + '_smooth'] = _rank(key, monitor.smooth_metrics, 'max')

        for key in monitor.max_keys:
            values = [m[key] for m in monitor.raw_metrics]
            sortx = np.argsort(values)[::-1]
            ranked_epochs = np.array(monitor.epochs)[sortx]
            rankings[key] = ranked_epochs

        # borda-like weighted rank aggregation.
        # probably could do something better.
        epoch_to_weight = ub.ddict(lambda: 0)
        for key, ranking in rankings.items():
            # weights = np.linspace(0, 1, num=len(ranking))[::-1]
            weights = np.logspace(0, 2, num=len(ranking))[::-1] / 100
            for epoch, w in zip(ranking, weights):
                epoch_to_weight[epoch] += w

        agg_ranking = ub.argsort(epoch_to_weight)[::-1]
        return agg_ranking
Пример #25
0
def benchmark_ondisk_crop():
    import kwplot
    plt = kwplot.autoplt()

    region = 'small_random'

    dim = 3
    # xdata = [64, 128, 256, 512]
    # xdata = [64, 128, 256, 320, 512, 640, 768, 896, 1024]
    # xdata = np.linspace(64, 4096, num=8).astype(np.int)
    # xdata = np.linspace(64, 2048, num=8).astype(np.int)
    # xdata = np.linspace(64, 1024, num=8).astype(np.int)

    xdata = [256, 1024, 4096, 8192, 16384]
    # xdata = [256, 1024, 4096, 8192]
    xdata = [256, 1024, 2048]
    # xdata = [256]

    ydata = ub.ddict(list)
    # for size in [64, 128, 256, 512, 1024, 2048, 4096]:
    for size in xdata:
        result = time_ondisk_crop(size, dim=dim, region=region, num=5)
        for key, val in result.items():
            min, mean, std = val
            ydata[key].append(mean * 1e6)

    # Sort legend by descending time taken on the largest image
    ydata = ub.odict(sorted(ydata.items(), key=lambda i: -i[1][-1]))

    kwplot.multi_plot(
        xdata,
        ydata,
        ylabel='micro-seconds (us)',
        xlabel='image size',
        title='Chip region={} benchmark for {}D image data'.format(
            region, dim),
        # yscale='log',
        ymin=1,
    )
    plt.show()
Пример #26
0
 def after_initialize(harn):
     harn.xdata = []
     harn.ydata = ub.ddict(list)
Пример #27
0
def _dump_monitor_tensorboard(harn,
                              mode='epoch',
                              special_groupers=['loss'],
                              serial=False):
    """
    Dumps PNGs to disk visualizing tensorboard scalars.
    Also dumps pickles to disk containing the same information.

    Args:
        mode (str | Tuple[str], default='epoch'):
            Can be either `epoch` or `iter`, or a tuple containing both.

        special_groupers (List[str], default=['loss']):
            list of strings indicating groups.  For each item, a logged value
            is contained in that group if it contains that item as a substring.

        serial (bool, default=False):
            If True executes the drawing process in the main process, otherwise
            it forks a new process and runs in the background.

    CommandLine:
        xdoctest -m netharn.mixins _dump_monitor_tensorboard --profile

    Example:
        >>> import netharn as nh
        >>> from netharn.mixins import _dump_monitor_tensorboard
        >>> harn = nh.FitHarn.demo()
        >>> harn.run()
        >>> try:
        >>>     _dump_monitor_tensorboard(harn)
        >>> except ImportError:
        >>>     pass
    """
    import ubelt as ub
    import netharn as nh
    from os.path import join
    import json
    import six
    from six.moves import cPickle as pickle

    # harn.debug('Plotting tensorboard data. serial={}, mode={}'.format(serial, mode))

    train_dpath = harn.train_dpath

    tb_data = nh.util.read_tensorboard_scalars(train_dpath, cache=0, verbose=0)

    tb_data['meta'] = {
        'nice': harn.hyper.nice,
        'special_groupers': special_groupers,
    }

    out_dpath = ub.ensuredir((train_dpath, 'monitor', 'tensorboard'))

    # Write a script that the user can run to
    if not ub.WIN32:
        reviz_fpath = join(out_dpath, 'revisualize.sh')
        reviz_text = ub.codeblock('''
            #!/bin/bash
            __heredoc__ = """
            Helper script to visualize all of the results in the pkl / json files
            in this directory.
            """
            REVIZ_DPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
            xdoctest -m netharn.mixins _dump_measures --out_dpath=$REVIZ_DPATH
            ''')
        with open(reviz_fpath, 'w') as file:
            file.write(reviz_text)
        try:
            import os
            import stat
            orig_mode = os.stat(reviz_fpath).st_mode
            new_flags = stat.S_IXGRP | stat.S_IEXEC
            if (new_flags & orig_mode) != new_flags:
                new_mode = orig_mode | new_flags
                os.chmod(reviz_fpath, new_mode)
        except Exception as ex:
            print('ex = {!r}'.format(ex))

    tb_data_pickle_fpath = join(out_dpath, 'tb_data.pkl')
    with open(tb_data_pickle_fpath, 'wb') as file:
        pickle.dump(tb_data, file)

    tb_data_json_fpath = join(out_dpath, 'tb_data.json')
    with open(tb_data_json_fpath, 'w') as file:
        if six.PY2:
            jsonkw = dict(indent=1)
        else:
            jsonkw = dict(indent=' ')
        try:
            json.dump(tb_data, file, **jsonkw)
        except Exception as ex:
            print('ex = {!r}'.format(ex))
            json.dump(
                {
                    'error': 'Unable to write to json.',
                    'info': 'See pickle file: {}'.format(tb_data_json_fpath)
                }, file, **jsonkw)

    # The following function draws the tensorboard result
    # This might take a some non-trivial amount of time so we attempt to run in
    # a separate process.
    func = _dump_measures
    args = (tb_data, out_dpath, mode)

    if not serial:

        if False:
            # Maybe thread-safer way of doing this? Maybe not, there is a
            # management thread used by futures.
            from concurrent import futures
            if not hasattr(harn, '_internal_executor'):
                harn._internal_executor = futures.ProcessPoolExecutor(
                    max_workers=1)
                harn._prev_job = None
            if harn._prev_job is None or harn._prev_job.done():
                # Wait to before submitting another job
                # Unsure if its ok that this job might not be a daemon
                harn.info('DO MPL DRAW')
                job = harn._internal_executor.submit(func, *args)
                harn._prev_job = job
            else:
                if harn._prev_job is not None:
                    harn.info('NOT DOING MPL DRAW')
                    harn.warn('NOT DOING MPL DRAW')
        else:
            # This causes thread-unsafe warning messages in the inner loop
            # Likely because we are forking while a thread is alive
            if not hasattr(harn, '_internal_procs'):
                harn._internal_procs = ub.ddict(dict)

            # Clear finished processes from the pool
            for pid in list(harn._internal_procs[mode].keys()):
                proc = harn._internal_procs[mode][pid]
                if not proc.is_alive():
                    harn._internal_procs[mode].pop(pid)

            # only start a new process if there is room in the pool
            if len(harn._internal_procs[mode]) < 1:
                import multiprocessing
                proc = multiprocessing.Process(target=func, args=args)
                proc.daemon = True
                proc.start()
                harn._internal_procs[mode][proc.pid] = proc
            else:
                if 0:
                    harn.warn('NOT DOING MPL DRAW')
    else:
        func(*args)
Пример #28
0
 def __init__(self, classes=None):
     self.recs = {}
     self.cx_to_lines = ub.ddict(list)
     self.classes = classes
Пример #29
0
def _best_prefix_transform(set1, target_set2):
    """
    Find a way to transform prefixes of items in set1 to match target_set2

    Example:
        >>> set1 = {'mod.f.0.w',
        >>>         'mod.f.1.b',
        >>>         'mod.f.1.n',
        >>>         'mod.f.1.rm',
        >>>         'mod.f.1.rv',}
        >>> #
        >>> target_set2 = {
        >>>      'bar.foo.extra.f.1.b',
        >>>      'bar.foo.extra.f.1.n',
        >>>      'bar.foo.extra.f.1.w',
        >>>      'bar.foo.extra.f.3.w',
        >>> }
        >>> _best_prefix_transform(set1, target_set2)
        >>> target_set2.add('JUNK')
        >>> _best_prefix_transform(set1, target_set2)
    """

    # probably an efficient way to do this with a trie

    # NOTE: In general this is a graph-isomorphism problem or a  maximum common
    # subgraph problem. However, we can look only at the special case of
    # "maximum common subtrees". Given two directory structures (as trees)
    # we find the common bits.
    # https://perso.ensta-paris.fr/~diam/ro/online/viggo_wwwcompendium/node168.html
    # We can approximate to O(log log n / log^2 n)
    # Can get algorithm from maximum independent set
    # https://arxiv.org/abs/1602.07210

    # The most efficient algorithm here would be for solving
    # "Maximum common labeled subtrees"
    # APX-hard for unordered trees, but polytime solveable for ordered trees
    # For directory structures we can induce an order, and hense obtain a
    # polytime solution
    # #
    # On the Maximum Common Embedded Subtree Problem for Ordered Trees
    # https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf

    from os.path import commonprefix
    prefixes1 = commonprefix(list(set1)).split('.')
    prefixes2 = commonprefix(list(target_set2)).split('.')

    # Remove the trailing prefixes that are the same
    num_same = 0
    for i in range(1, min(len(prefixes1), len(prefixes2))):
        if prefixes1[-i] == prefixes2[-i]:
            num_same = i
        else:
            break
    prefixes1 = prefixes1[:-num_same]
    prefixes2 = prefixes2[:-num_same]

    ALLOW_FUZZY = 1
    if ALLOW_FUZZY and len(prefixes2) == 0:
        # SUPER HACK FOR CASE WHERE THERE IS JUST ONE SPOILER ELEMENT IN THE
        # TARGET SET. THE ALGORITHM NEEDS TO BE RETHOUGHT FOR THAT CASE
        possible_prefixes = [k.split('.') for k in target_set2]
        prefix_hist = ub.ddict(lambda: 0)
        for item in possible_prefixes:
            for i in range(1, len(item)):
                prefix_hist[tuple(item[0:i])] += 1
        prefixes2 = ['.'.join(ub.argmax(prefix_hist))]

    def add_prefix(items, prefix):
        return {prefix + k for k in items}

    def remove_prefix(items, prefix):
        return {k[len(prefix):] if k.startswith(prefix) else k for k in items}

    import itertools as it
    found_cand = []
    for i1, i2 in it.product(range(len(prefixes1) + 1),
                             range(len(prefixes2) + 1)):
        if i1 == 0 and i2 == 0:
            continue
        # Very inefficient, we should be able to do better
        prefix1 = '.'.join(prefixes1[:i1])
        prefix2 = '.'.join(prefixes2[:i2])
        if prefix1:
            prefix1 = prefix1 + '.'
        if prefix2:
            prefix2 = prefix2 + '.'

        # We are allowed to remove a prefix from a set, add the other
        # prefix to the set, or remove and then add.
        set1_cand1 = remove_prefix(set1, prefix1)
        set1_cand2 = add_prefix(set1, prefix2)
        set1_cand3 = add_prefix(set1_cand1, prefix2)

        common1 = set1_cand1 & target_set2
        common2 = set1_cand2 & target_set2
        common3 = set1_cand3 & target_set2
        if common1:
            found_cand.append({
                'transform': [('remove', prefix1)],
                'value': len(common1),
            })
        if common2:
            found_cand.append({
                'transform': [('add', prefix2)],
                'value': len(common2),
            })
        if common3:
            found_cand.append({
                'transform': [('remove', prefix1), ('add', prefix2)],
                'value':
                len(common3),
            })
    if len(found_cand):
        found = max(found_cand, key=lambda x: x['value'])
    else:
        found = None
    return found
Пример #30
0
    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if 0:
            # Automatic way to reduce nodes in the trees?
            # If node b always follows node a, can we contract it?
            nodes1 = [n for p in other_keys for n in p.split('.')]
            nodes2 = [n for p in self_keys for n in p.split('.')]
            tups1 = list(tup for key in other_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            tups2 = list(tup for key in self_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            x = ub.ddict(list)
            for a, b in tups1:
                x[a].append(b)
            for a, b in tups2:
                x[a].append(b)

            nodehist = ub.dict_hist(nodes1 + nodes2)

            for k, v in x.items():
                print('----')
                print(k)
                print(nodehist[k])
                follow_hist = ub.dict_hist(v)
                print(follow_hist)
                total = sum(follow_hist.values())
                if ub.allsame(follow_hist.values()) and total == nodehist[k]:
                    print('CONTRACT')

            # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2]))
            # print(forest_str(paths_to_otree(other_keys, '.')))

        # common_keys = other_keys.intersection(self_keys)
        # if not common_keys:
        if not other_keys.issubset(self_keys):
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association in {'embedding', 'isomorphism'}:
                if verbose > 1:
                    print('Using subpath {} association, may take some time'.
                          format(association))
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)

                if 1:
                    # hack to filter to reduce tree size in embedding problem
                    def shrink_paths(paths):
                        new_paths = []
                        for p in paths:
                            p = p.replace('.0', ':0')
                            p = p.replace('.1', ':1')
                            p = p.replace('.2', ':2')
                            p = p.replace('.3', ':3')
                            p = p.replace('.4', ':4')
                            p = p.replace('.5', ':5')
                            p = p.replace('.6', ':6')
                            p = p.replace('.7', ':7')
                            p = p.replace('.8', ':8')
                            p = p.replace('.9', ':9')
                            p = p.replace('.weight', ':weight')
                            p = p.replace('.bias', ':bias')
                            p = p.replace('.num_batches_tracked',
                                          ':num_batches_tracked')
                            p = p.replace('.running_mean', ':running_mean')
                            p = p.replace('.running_var', ':running_var')
                            # p = p.replace('.conv1', ':conv1')
                            # p = p.replace('.conv2', ':conv2')
                            # p = p.replace('.conv3', ':conv3')
                            # p = p.replace('.bn1', ':bn1')
                            # p = p.replace('.bn2', ':bn2')
                            # p = p.replace('.bn3', ':bn3')
                            new_paths.append(p)
                        return new_paths

                    # Reducing the depth saves a lot of time
                    paths1_ = shrink_paths(paths1)
                    paths2_ = shrink_paths(paths2)

                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1_, paths2_, sep='.', mode=association)
                subpaths1 = [p.replace(':', '.') for p in subpaths1]
                subpaths2 = [p.replace(':', '.') for p in subpaths2]
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    other_unmapped = sorted(other_keys - set(mapping.keys()))
                    self_unmapped = sorted(self_keys - set(mapping.values()))
                    print('-- embed association (other -> self) --')
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                    print('self_unmapped = {}'.format(
                        ub.repr2(self_unmapped, nl=1)))
                    print('other_unmapped = {}'.format(
                        ub.repr2(other_unmapped, nl=1)))
                    print('len(mapping) = {}'.format(
                        ub.repr2(len(mapping), nl=1)))
                    print('len(self_unmapped) = {}'.format(
                        ub.repr2(len(self_unmapped), nl=1)))
                    print('len(other_unmapped) = {}'.format(
                        ub.repr2(len(other_unmapped), nl=1)))
                    print('-- end embed association --')

                # HACK: something might be wrong, there was an instance with
                # HRNet_w32 where multiple keys mapped to the same key
                # bad keys were incre_modules.3.0.conv1.weight and conv1.weight
                #
                # This will not error, but may produce bad output
                try:
                    model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                                   model_state_dict)
                except Exception as ex:
                    HACK = 1
                    if HACK:
                        new_state_dict_ = {}
                        for k, v in model_state_dict.items():
                            new_state_dict_[mapping.get(k, k)] = v
                        model_state_dict = new_state_dict_
                        warnings.warn('ex = {!r}'.format(ex))
                    else:
                        raise
            else:
                raise KeyError(association)
        return model_state_dict