Exemplos de map_vals em Python, exemplos de ubelt.map_vals em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_dict.py Projeto: sn0wfree/ubelt

def test_group_items_sorted():
    pairs = [
        ('ham', 'protein'),
        ('jam', 'fruit'),
        ('spam', 'protein'),
        ('eggs', 'protein'),
        ('cheese', 'dairy'),
        ('banana', 'fruit'),
    ]
    item_list, groupid_list = zip(*pairs)
    result1 = ub.group_items(item_list, groupid_list, sorted_=False)
    result2 = ub.group_items(item_list, groupid_list, sorted_=True)
    result1 = ub.map_vals(set, result1)
    result2 = ub.map_vals(set, result2)
    assert result1 == result2

Exemplo n.º 2

0

Exibir arquivo

Arquivo: sseg_camvid.py Projeto: Erotemic/netharn

def setup_coco_datasets():
    """
    TODO:
        - [ ] Read arbitrary coco datasets here
        - [ ] Do proper train / validation split
        - [ ] Allow custom train / validation split
    """
    from netharn.data.grab_camvid import grab_coco_camvid, grab_camvid_train_test_val_splits
    coco_dset = grab_coco_camvid()

    # Use the same train/test/vali splits used in segnet
    gid_subsets = grab_camvid_train_test_val_splits(coco_dset, mode='segnet')
    print(ub.map_vals(len, gid_subsets))
    # gid_subsets.pop('test')

    # all_gids = list(coco_dset.imgs.keys())
    # gid_subsets = {
    #     'train': all_gids[0:-100],
    #     'vali': all_gids[-100:],
    # }
    coco_datasets = {
        tag: coco_dset.subset(gids)
        for tag, gids in gid_subsets.items()
    }
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)
    return coco_datasets

Exemplo n.º 3

0

Exibir arquivo

    def fix_conference_places(bibman):

        pubman = constants_tex_fixes.PubManager()

        needed = set()

        for entry in bibman.cleaned.values():
            if entry['pub_type'] == 'conference':
                accro, year = (entry['pub_accro'], entry['year'])
                pub = pubman.find(accro)
                if pub.places is None or int(year) not in pub.places:
                    needed.add((accro, year))
                else:
                    place = pub.places[int(year)]
                    print('place = {!r}'.format(place))
                    entry['address'] = place

        if needed:
            needed = list(needed)
            used_years = ub.group_items(needed, ut.take_column(needed, 0))
            for k, v in list(used_years.items()):
                used_years[k] = sorted(v)

            sortby = ub.map_vals(lambda vs: (len(vs), max(e[1] for e in vs)),
                                 used_years)
            used_years = ut.order_dict_by(used_years, ub.argsort(sortby))
            print('NEED CONFERENCE LOCATIONS')
            print(ub.repr2(used_years, nl=2))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: batch_samplers.py Projeto: Kitware/netharn

    def __init__(self,
                 index_to_label,
                 batch_size=1,
                 num_batches='auto',
                 quantile=0.5,
                 shuffle=False,
                 rng=None):
        import kwarray

        rng = kwarray.ensure_rng(rng, api='python')
        label_to_indices = kwarray.group_items(np.arange(len(index_to_label)),
                                               index_to_label)

        label_to_freq = ub.map_vals(len, label_to_indices)

        label_to_subsampler = {
            label: RingSampler(indices, shuffle=shuffle, rng=rng)
            for label, indices in label_to_indices.items()
        }

        self.label_to_freq = label_to_freq
        self.index_to_label = index_to_label
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.rng = rng
        self.label_to_indices = label_to_indices
        self.label_to_subsampler = label_to_subsampler

        if num_batches == 'auto':
            self.num_batches = self._auto_num_batches(quantile)
        else:
            self.num_batches = num_batches

        self.labels = list(self.label_to_indices.keys())

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_dict.py Projeto: sn0wfree/ubelt

def test_group_items_callable():
    pairs = [
        ('ham', 'protein'),
        ('jam', 'fruit'),
        ('spam', 'protein'),
        ('eggs', 'protein'),
        ('cheese', 'dairy'),
        ('banana', 'fruit'),
    ]
    items, groupids = zip(*pairs)
    lut = dict(zip(items, groupids))

    result1 = ub.group_items(items, groupids)
    result2 = ub.group_items(items, lut.__getitem__)

    result1 = ub.map_vals(set, result1)
    result2 = ub.map_vals(set, result2)
    assert result1 == result2

Exemplo n.º 6

0

Exibir arquivo

def demo_mwe_issue_dict_parallel():
    config = DemoConfig()
    parser = config.argparse()
    config.update(parser.parse_known_args()[0].__dict__)

    self = DemoModel()
    inputs = {
        'rgb': torch.rand(2, 3, 3, 5),
        'aux': torch.rand(2, 1, 3, 5),
    }
    xpu = nh.XPU.coerce(config['xpu'])
    inputs = xpu.move(inputs)
    model = xpu.mount(self)
    print('xpu = {!r}'.format(xpu))
    print('model = {!r}'.format(model))
    print('inputs.Tshape = ' + ub.repr2(ub.map_vals(lambda x: x.shape, inputs), nl=1))
    outputs = model(inputs)
    print('outputs.Tshape = ' + ub.repr2(ub.map_vals(lambda x: x.shape, outputs), nl=1))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_dict.py Projeto: sn0wfree/ubelt

def test_group_items_sorted_mixed_types():
    import random
    groupid_list = [
        1,
        2,
        3,
        1,
        2,
        3,
        1,
        2,
        3,
        1,
        2,
        3,
        '1',
        '2',
        '3',
        '1',
        '2',
        '3',
        '1',
        '2',
        '3',
        '1',
        '2',
        '3',
    ]
    item_list = list(range(len(groupid_list)))

    # Randomize the order
    random.Random(947043).shuffle(groupid_list)
    random.Random(947043).shuffle(item_list)

    result1 = ub.group_items(item_list, groupid_list, sorted_=True)
    result2 = ub.group_items(item_list, groupid_list, sorted_=False)

    result1 = ub.map_vals(set, result1)
    result2 = ub.map_vals(set, result2)
    assert result1 == result2

    assert '1' in result1
    assert 1 in result1

Exemplo n.º 8

0

Exibir arquivo

Arquivo: bench_closures.py Projeto: Erotemic/misc

def bench_closures():
    """
    Is it faster to use a closure or pass in the variables explicitly?
    """
    import ubelt as ub
    import timerit
    import numpy as np

    # Test a nested func with vs without a closure
    def rand_complex(*shape):
        real = np.random.rand(*shape).astype(np.complex)
        imag = np.random.rand(*shape).astype(np.complex) * 1j
        mat = real + imag
        return mat

    s = int(ub.argval('--s', default='1'))
    mat1 = rand_complex(s, s)
    mat2 = rand_complex(s, s)
    N = 1000
    offset = 100

    def nested_closure():
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    def nested_explicit(mat1, mat2, N, offset):
        mat3 = mat1 @ mat2
        for i in range(N):
            mat3 += i + offset

    ti = timerit.Timerit(int(2**11),
                         bestof=int(2**8),
                         verbose=int(ub.argval('--verbose', default='1')))

    for timer in ti.reset('nested_explicit'):
        with timer:
            nested_explicit(mat1, mat2, N, offset)

    for timer in ti.reset('nested_closure'):
        with timer:
            nested_closure()

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9,
                                             nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))

Exemplo n.º 9

0

Exibir arquivo

 def __init__(verif, infr):
     verif.rng = np.random.RandomState(4033913)
     verif.dummy_params = {
         NEGTV: {'mean': .2, 'std': .25},
         POSTV: {'mean': .85, 'std': .2},
         INCMP: {'mean': .15, 'std': .1},
     }
     verif.infr = infr
     verif.orig_nodes = set(infr.aids)
     verif.orig_labels = infr.get_node_attrs('orig_name_label')
     verif.orig_groups = ub.invert_dict(verif.orig_labels, False)
     verif.orig_groups = ub.map_vals(set, verif.orig_groups)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: mixin_viz.py Projeto: Erotemic/graphid

def color_nodes(graph,
                labelattr='label',
                brightness=.878,
                outof=None,
                sat_adjust=None):
    """ Colors edges and nodes by nid """
    node_to_lbl = nx.get_node_attributes(graph, labelattr)
    unique_lbls = sorted(set(node_to_lbl.values()))
    ncolors = len(unique_lbls)
    if outof is None:
        if (ncolors) == 1:
            unique_colors = [util.Color('lightblue').as01()]
        elif (ncolors) == 2:
            # https://matplotlib.org/examples/color/named_colors.html
            unique_colors = ['royalblue', 'orange']
            unique_colors = [util.Color(c).as01('bgr') for c in unique_colors]
        else:
            unique_colors = util.distinct_colors(ncolors,
                                                 brightness=brightness)
    else:
        unique_colors = util.distinct_colors(outof, brightness=brightness)

    if sat_adjust:
        unique_colors = [
            util.Color(c).adjust_hsv(0.0, sat_adjust, 0.0)
            for c in unique_colors
        ]
    # Find edges and aids strictly between two nids
    if outof is None:
        lbl_to_color = ub.dzip(unique_lbls, unique_colors)
    else:
        gray = util.Color('lightgray').as01('bgr')
        unique_colors = [gray] + unique_colors
        offset = max(1, min(unique_lbls)) - 1
        node_to_lbl = ub.map_vals(lambda nid: max(0, nid - offset),
                                  node_to_lbl)
        lbl_to_color = ub.dzip(range(outof + 1), unique_colors)
    node_to_color = ub.map_vals(lbl_to_color, node_to_lbl)
    nx.set_node_attributes(graph, name='color', values=node_to_color)
    nx_ensure_agraph_color(graph)

Exemplo n.º 11

0

Exibir arquivo

def rank_inventory(inventory):
    candidates = list(ub.flatten(list(pkmn.family(ancestors=False, node=True))
                                 for pkmn in inventory))

    groups = ub.group_items(candidates, key=lambda p: p.name)

    leages = {
        'master': {'max_cp': float('inf')},
        'ultra': {'max_cp': 2500},
        'great': {'max_cp': 1500},
        'little': {'max_cp': 500},
    }

    max_level = 45  # for XL candy
    # max_level = 40  # normal

    all_dfs = []

    for name, group in groups.items():
        print('\n\n------------\n\n')
        print('name = {!r}'.format(name))
        for leage_name, leage_filters in leages.items():
            max_cp = leage_filters['max_cp']
            print('')
            print(' ========== ')
            print(' --- {} in {} --- '.format(name, leage_name))
            not_eligible = [p for p in group if p.cp is not None and p.cp > max_cp]
            eligible = [p for p in group if p.cp is None or p.cp <= max_cp]
            print('not_eligible = {!r}'.format(not_eligible))
            if len(eligible) > 0:
                first = ub.peek(eligible)
                have_ivs = eligible
                df = first.leage_rankings_for(have_ivs, max_cp=max_cp,
                                              max_level=max_level)
                all_dfs.append(df)
            else:
                print('none eligable')

    # Print out the best ranks for each set of IVS over all possible forms
    # (lets you know which ones can be transfered safely)

    iv_to_rank = ub.ddict(list)
    for df in all_dfs:
        if df is not None:
            df = df.set_index(['iva', 'ivd', 'ivs'])
            for iv, rank in zip(df.index, df['rank']):
                iv_to_rank[iv].append(rank)

    iv_to_best_rank = ub.map_vals(sorted, iv_to_rank)
    iv_to_best_rank = ub.sorted_vals(iv_to_best_rank)
    print('iv_to_best_rank = {}'.format(ub.repr2(iv_to_best_rank, nl=1, align=':')))

Exemplo n.º 12

0

Exibir arquivo

    def images_with_keypoints():
        keypoint_gids = set()
        for aid, ann in merged.anns.items():
            if ann['roi_shape'] == 'keypoints':
                keypoint_gids.add(ann['image_id'])

        relevant = ub.dict_subset(merged.gid_to_aids, keypoint_gids)
        relevant = {
            gid:
            [a for a in aids if merged.anns[a]['roi_shape'] == 'keypoints']
            for gid, aids in relevant.items()
        }

        gid_list = ub.argsort(ub.map_vals(len, relevant))[::-1]
        return gid_list

Exemplo n.º 13

0

Exibir arquivo

Arquivo: bench_modaccess_overhead.py Projeto: Kulbear/ubelt

def main():
    import ubelt as ub
    from ubelt import util_list
    from ubelt.util_list import take
    import random
    from math import e

    # # Data
    N = 100
    array = [random.random() for _ in range(N)]
    indices = [random.randint(0, N - 1) for _ in range(int(N // e))]

    ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1)

    for timer in ti.reset('take'):
        with timer:
            list(take(array, indices))

    for timer in ti.reset('util_list.take'):
        with timer:
            list(util_list.take(array, indices))

    for timer in ti.reset('ub.take'):
        with timer:
            list(ub.take(array, indices))

    print('---')

    # import pandas as pd
    # df = pd.DataFrame(rankings)
    # print('df =\n{}'.format(df))

    print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2)))
    print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2)))

    positions = ub.ddict(list)
    for m1, v1 in ti.rankings.items():
        for pos, label in enumerate(ub.argsort(v1), start=0):
            positions[label].append(pos)
    average_position = ub.map_vals(lambda x: sum(x) / len(x), positions)
    print('average_position = {}'.format(ub.repr2(average_position)))

Exemplo n.º 14

0

Exibir arquivo

    def category_annotation_frequency(self):
        """
        Reports the number of annotations of each category

        Example:
            >>> dataset = demo_coco_data()
            >>> self = CocoDataset(dataset, tag='demo')
            >>> hist = self.category_annotation_frequency()
            >>> print(ub.repr2(hist))
            {
                'astroturf': 0,
                'astronaut': 1,
                'astronomer': 1,
                'helmet': 1,
                'rocket': 1,
                'mouth': 2,
                'star': 5,
            }
        """
        catname_to_nannots = ub.map_keys(lambda x: self.cats[x]['name'],
                                         ub.map_vals(len, self.cid_to_aids))
        catname_to_nannots = ub.odict(
            sorted(catname_to_nannots.items(), key=lambda kv: (kv[1], kv[0])))
        return catname_to_nannots

Exemplo n.º 15

0

Exibir arquivo

Arquivo: darknet.py Projeto: afcarl/clab

    def postprocess(self, output, inp_size, orig_sizes, conf_thresh=0.24,
                    nms_thresh=0.5, max_per_image=300):
        """
        Postprocess the raw network output into usable bounding boxes

        Args:
            aoff_pred (ndarray): [B, HxW, A, 4]
                anchor offsets in the format (sig(x), sig(y), exp(w), exp(h))
                note: in the aoff format x and y are centers of the box
                and wh represenets multiples of the anchor w/h

            iou_pred (ndarray): [B, HxW, A, 1]
                predicted iou (is this the objectness score?)

            prob_pred (ndarray): [B, HxW, A, C]
                predicted class probability

            inp_size (tuple): size (W, H) of input to network

            orig_sizes (list): [B, 2]
                size (W, H) of each in image before rescale

            conf_thresh (float): threshold for filtering bboxes. Keep only the
                detections above this confidence value.

            nms_thresh (float): nonmax supression iou threshold

        Notes:
            Let B = batch_size
            Let A = num_anchors
            Let C = num_classes
            Let (H, W) = shape of the output grid

            Original params for nms_thresh (iou_thresh) and conf_thresh
            (thresh) are here:
                https://github.com/pjreddie/darknet/blob/master/examples/yolo.c#L213

            On parameter settings:
                Remove the bounding boxes which have no object. Remove the
                bounding boxes that predict a confidence score less than a
                threshold of 0.24

                https://towardsdatascience.com/training-object-detection-yolov2-from-scratch-using-cyclic-learning-rates-b3364f7e4755

            Network Visualization:
                http://ethereon.github.io/netscope/#/gist/d08a41711e48cf111e330827b1279c31

        CommandLine:
            python -m clab.models.yolo2.darknet Darknet19.postprocess --show

        Example:
            >>> from clab.models.yolo2.darknet import *
            >>> inp_size = (288, 288)
            >>> self = Darknet19(num_classes=20)
            >>> state_dict = torch.load(demo_weights())['model_state_dict']
            >>> self.load_state_dict(state_dict)
            >>> im_data, rgb255 = demo_image(inp_size)
            >>> im_data = torch.cat([im_data, im_data])  # make a batch size of 2
            >>> output = self(im_data)
            >>> # Define remaining params
            >>> orig_sizes = torch.LongTensor([rgb255.shape[0:2][::-1]] * len(im_data))
            >>> conf_thresh = 0.01
            >>> nms_thresh = 0.5
            >>> postout = self.postprocess(output, inp_size, orig_sizes, conf_thresh, nms_thresh)
            >>> out_boxes, out_scores, out_cxs = postout
            >>> # xdoc: +REQUIRES(--show)
            >>> from clab.util import mplutil
            >>> mplutil.qtensure()  # xdoc: +SKIP
            >>> label_names = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            >>>  'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
            >>>  'dog', 'horse', 'motorbike', 'person',
            >>>  'pottedplant', 'sheep', 'sofa', 'train',
            >>>  'tvmonitor')
            >>> import pandas as pd
            >>> cls_names = list(ub.take(label_names, out_cxs[0]))
            >>> print(pd.DataFrame({'name': cls_names, 'score': out_scores[0]}))
            >>> mplutil.figure(fnum=1, doclf=True)
            >>> mplutil.imshow(rgb255, colorspace='rgb')
            >>> mplutil.draw_boxes(out_boxes[0])
            >>> mplutil.show_if_requested()
        """
        aoff_pred_, iou_pred_, prob_pred_ = output

        # convert to numpy
        aoff_pred = aoff_pred_.data.cpu().numpy()
        iou_pred  = iou_pred_.data.cpu().numpy()
        prob_pred = prob_pred_.data.cpu().numpy()

        orig_sizes = orig_sizes.data.cpu().numpy()

        # num_classes, num_anchors = cfg.num_classes, cfg.num_anchors
        num_classes = self.num_classes
        anchors = self.anchors
        out_size = np.array(inp_size) // 32  # hacked we know the factor is 32
        W, H = out_size

        out_boxes = []
        out_scores = []
        out_cxs = []

        # For each image in the batch, postprocess the predicted boxes
        for bx in range(aoff_pred.shape[0]):
            aoffs = aoff_pred[bx][None, :]
            ious  = iou_pred[bx]
            probs = prob_pred[bx]
            orig_w, orig_h = orig_sizes[bx]

            # Convert anchored predictions to absolute tlbr bounding boxes in
            # normalized space
            aoffs = np.ascontiguousarray(aoffs, dtype=np.float)
            norm_boxes = yolo_utils.yolo_to_bbox(aoffs, anchors, H, W)[0]

            # Scale the bounding boxes to the size of the original image.
            # and convert to integer representation.
            boxes = norm_boxes.copy()
            boxes[..., 0::2] *= float(orig_w)
            boxes[..., 1::2] *= float(orig_h)
            boxes = boxes.astype(np.int)

            # converts [1, W * H, A, 4] -> [W * H * A, 4]
            boxes = np.reshape(boxes, [-1, 4])
            ious = np.reshape(ious, [-1])
            probs = np.reshape(probs, [-1, num_classes])

            # Predict the class with maximum probability
            cls_inds = np.argmax(probs, axis=1)
            cls_probs = probs[(np.arange(probs.shape[0]), cls_inds)]

            """
            Reference: arXiv:1506.02640 [cs.CV] (Yolo 1):
                Formally we define confidence as $Pr(Object) ∗ IOU^truth_pred$.
                If no object exists in that cell, the confidence scores should
                be zero. Otherwise we want the confidence score to equal the
                intersection over union (IOU) between the predicted box and the
                ground truth
            """
            # Compute the final probabilities for the predicted class
            scores = ious * cls_probs

            # filter boxes based on confidence threshold
            keep_conf = np.where(scores >= conf_thresh)
            boxes = boxes[keep_conf]
            scores = scores[keep_conf]
            cls_inds = cls_inds[keep_conf]

            # nonmax supression (per-class)
            keep_flags = np.zeros(len(boxes), dtype=np.uint8)
            cx_to_inds = ub.group_items(range(len(cls_inds)), cls_inds)
            cx_to_inds = ub.map_vals(np.array, cx_to_inds)
            for cx, inds in cx_to_inds.items():
                # get predictions for each class
                c_bboxes = boxes[inds]
                c_scores = scores[inds]
                c_keep = yolo_utils.nms_detections(c_bboxes, c_scores,
                                                   nms_thresh)
                keep_flags[inds[c_keep]] = 1

            keep_nms = np.where(keep_flags > 0)
            boxes = boxes[keep_nms]
            scores = scores[keep_nms]
            cls_inds = cls_inds[keep_nms]

            # clip
            boxes = yolo_utils.clip_boxes(boxes, im_shape=(orig_h, orig_w))

            # sort boxes by descending score
            sortx = scores.argsort()[::-1]
            boxes = boxes[sortx]
            scores = scores[sortx]
            cls_inds = cls_inds[sortx]

            if max_per_image > 0 and len(boxes) > max_per_image:
                boxes = boxes[:max_per_image]
                scores = scores[:max_per_image]
                cls_inds = cls_inds[:max_per_image]

            out_boxes.append(boxes)
            out_scores.append(scores)
            out_cxs.append(cls_inds)

        postout = (out_boxes, out_scores, out_cxs)
        return postout

Exemplo n.º 16

0

Exibir arquivo

def 字典_根据值重建(func, dict_):
    data = ub.map_vals(func, dict_)
    return data

Exemplo n.º 17

0

Exibir arquivo

def benchmark_hash_data():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --convert=True --show
        python ~/code/ubelt/dev/bench_hash.py --convert=False --show
    """
    import ubelt as ub
    #ITEM = 'JUST A STRING' * 100
    ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
    scales = list(range(5, 13))
    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    convert = ub.argval('--convert', default='True').lower() == 'True'
    print('convert = {!r}'.format(convert))
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2**s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        data = [ITEM] * N
        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_data(data, hasher=hasher, convert=convert)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print('convert = {!r}'.format(convert))
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata,
                          ydata,
                          xlabel='N',
                          ylabel='seconds',
                          title='convert = {}'.format(convert))
        kwplot.show_if_requested()

Exemplo n.º 18

0

Exibir arquivo

def naive_password_strategy(required_len=14,
                            required_caps=1,
                            required_special=1,
                            required_digits=1):
    """
    Simulate a "bad" password that meets typical password requirements

    Get a naive version of the N char min special char password One
    common strategy for getting a 14 char pass is using 2 words or a word
    and a date with misspellings, shuffled case, and a special char,
    which is probably _, -, ., !, or @

    Example:
        scheme = naive_password_strategy()
        print(f'scheme={scheme}')
    """
    # When people are forced to include a special character, this is the
    # liklihood they choose one of the following:
    # https://www.reddit.com/r/dataisbeautiful/comments/2vfgvh/most_frequentlyused_special_characters_in_10/
    special_char_freq = {
        '_': 0.332,
        '.': 0.304,
        '-': 0.086,
        '!': 0.065,
        '@': 0.052,
        '*': 0.032,
        '$': 0.019,
        '&': 0.009,
        '%': 0.007,
    }
    _total = sum(special_char_freq.values())
    special_char_prob = ub.map_vals(lambda x: x / _total, special_char_freq)

    # Only seach the most likely special chars
    naive_special_chars = {
        k: v
        for k, v in special_char_prob.items() if v > 0.05
    }

    if 0:
        import diceware
        wlpath = diceware.wordlist.get_wordlist_path('en')
        wlpath = diceware.wordlist.get_wordlist_path('en_securedrop')
        wordlist = list(diceware.wordlist.WordList(wlpath))
        word_lengths = sorted(map(len, wordlist))
        word_length_hist = ub.dict_hist(word_lengths)
    else:
        # Number of common password words with a specific length
        word_length_hist = {
            1: 10,
            2: 90,
            3: 582,
            4: 2279,
            5: 3350,
            6: 1313,
            7: 539,
            8: 22,
            9: 5,
            10: 2
        }

    # Also needs a number and special char
    required_word_len = required_len - 2

    # How many permutations of N words are there that get over the char limit?
    total_passwords = 0
    import itertools as it
    import functools
    import operator as op
    possible_num_word = [1, 2, 3]
    for num_words in possible_num_word:
        for ts in it.product(*[word_length_hist.items()] * num_words):
            ks = [k for k, v in ts]
            vs = [v for k, v in ts]
            # If the lengths are above, we can take any of these permutations
            # (with replacement)
            if sum(ks) > required_word_len:
                # Compute the number of phrases, then augment this with the
                # special properties.
                num_phrases = functools.reduce(op.mul, vs)

                # People might insert a special character at the start, middle,
                # or end, or predictably replace a letter.
                predictability_factor = 2
                num_special_locs = (num_words + 1) * predictability_factor
                special_factor = required_special * len(
                    naive_special_chars) * num_special_locs

                # People might insert a digit at start, middle, or end, or maybe
                # inside of a word replacing a common letter.
                num_digit_locs = num_words + 1
                num_digits = 10 + 100  # usually a 1 or 2 digit number
                digit_factor = required_digits * num_digits * num_digit_locs

                # People might only shuffle the case of 1 or 2 letters.
                # usually at the beginning of words
                caps_factor = required_caps * num_words

                total = (num_phrases * (1 + special_factor) *
                         (1 + caps_factor) * (1 + digit_factor))
                total_passwords += total

    name_parts = ['naive', str(required_len)]
    if required_caps:
        name_parts.append('caps')

    if required_digits:
        name_parts.append('digit')

    if required_special:
        name_parts.append('special')

    name = '-'.join(name_parts)

    scheme = {
        'name': name,
        'num': 1,
        'base': total_passwords,
    }
    return scheme

Exemplo n.º 19

0

Exibir arquivo

Arquivo: functional.py Projeto: Kitware/netharn

def load_partial_state(model,
                       model_state_dict,
                       leftover=None,
                       ignore_unset=False,
                       verbose=2,
                       mangle=True,
                       association=None,
                       initializer=None):
    """
    CommandLine:
        python -m netharn.initializers.nninit_base load_partial_state

    Args:
        model (torch.nn.Module): module to initialize

        model_state_dict (dict): state dict we wish to transfer

        leftover (callable): fallback method for initializing incompatible
             areas, if none then those areas are left as-is.

        association (str): controls how we search for the association between
            the two model states. Can be strict, module-hack, prefix-hack, or
            embedding.  Default is: prefix-hack.

        mangle (bool, default=True): If True, mangles tensors that have the
            same key, but different shapes forcing them to fit. This might
            destroy information when forcing a a larger tensor into a smaller
            tensor, or leave extra uninitialized room when a small tensor is
            placed in a larger one. Note be careful when mangling a
            classification layer if class indexes are not aligned.

        verbose (int): verbosity level

    Returns:
        Dict: info - summary of actions taken

    TODO:
        - [ ] Allow user to specify how incompatible layers are handled.

    Notes:

        Have you ever had the scenario where

        Has anyone ever had a problem where you had a torch model with a state
        dict with keys that looked like: `mymodel.detector.layer1.conv.weight`,
        but you had a pretrained weight file with keys that looked like:
        `module.layer1.conv.weight`?

        The latest version of
        `netharn.initializers.functional.load_patial_state` can handle this by
        solving a maximum-common-subtree-isomorphism problem. This computes the
        largest possible mapping between the two state dictionaries that share
        consistent suffixes.

        >>> # This means you can load an off-the-shelf unmodified pretrained resnet50
        >>> # where the keys might look something like this:
        >>> resnet_keys = {
        >>>     'conv1.weight',
        >>>     'layer1.0.conv1.weight',
        >>>     'layer1.0.conv2.weight',
        >>>     'layer1.0.conv3.weight',
        >>>     'layer1.0.downsample.0.weight',
        >>>     'layer2.0.conv1.weight',
        >>>     'layer2.0.conv2.weight',
        >>>     'layer2.0.conv3.weight',
        >>>     'layer3.0.conv1.weight',
        >>>     'layer4.0.conv1.weight',
        >>>     'fc.weight',
        >>>     'fc.bias',
        >>> }
        >>> #
        >>> # And perhaps you have a model that has a state dict where keys
        >>> # look like this:
        >>> model_keys = {
        >>>     'preproc.conv1.weight'
        >>>     'backbone.layer1.0.conv1.weight',
        >>>     'backbone.layer1.0.conv2.weight',
        >>>     'backbone.layer1.0.conv3.weight',
        >>>     'backbone.layer1.0.downsample.0.weight',
        >>>     'backbone.layer2.0.conv1.weight',
        >>>     'backbone.layer2.0.conv2.weight',
        >>>     'backbone.layer2.0.conv3.weight',
        >>>     'backbone.layer3.0.conv1.weight',
        >>>     'backbone.layer4.0.conv1.weight',
        >>>     'head.conv1'
        >>>     'head.conv2'
        >>>     'head.fc.weight'
        >>>     'head.fc.bias'
        >>> }
        >>> #
        >>> # We can compute a partial mapping between them
        >>> subpaths1, subpaths2 = maximum_common_ordered_subpaths(resnet_keys, model_keys)
        >>> print(ub.repr2(ub.dzip(subpaths1, subpaths2)))
        {
            'layer1.0.conv2.weight':        'backbone.layer1.0.conv2.weight',
            'layer1.0.conv3.weight':        'backbone.layer1.0.conv3.weight',
            'layer1.0.downsample.0.weight': 'backbone.layer1.0.downsample.0.weight',
            'layer2.0.conv1.weight':        'backbone.layer2.0.conv1.weight',
            'layer2.0.conv2.weight':        'backbone.layer2.0.conv2.weight',
            'layer2.0.conv3.weight':        'backbone.layer2.0.conv3.weight',
            'layer3.0.conv1.weight':        'backbone.layer3.0.conv1.weight',
            'layer4.0.conv1.weight':        'backbone.layer4.0.conv1.weight',
        }

        Also, if the sizes of the tensor don't quite fit, they will be
        mangled, i.e. "shoved-in" as best as possible.


    Example:
        >>> import netharn as nh
        >>> # ---
        >>> model_other = nh.models.ToyNet2d(input_channels=1, num_classes=10)
        >>> model_other.hack_param1 = torch.nn.Parameter(torch.rand(1))
        >>> model_other.hack_param3 = torch.nn.Parameter(torch.rand(3))
        >>> model_other.hack_param5 = torch.nn.Parameter(torch.rand(3))
        >>> # ---
        >>> model_self = nh.models.ToyNet2d(input_channels=3, num_classes=2)
        >>> model_self.hack_param1 = torch.nn.Parameter(torch.rand(3))
        >>> model_self.hack_param2 = torch.nn.Parameter(torch.rand(3))
        >>> model_self.hack_param4 = torch.nn.Parameter(torch.rand(3))
        >>> # ---
        >>> model_state_dict = model_other.state_dict()
        >>> load_partial_state(model_self, model_state_dict)
        >>> load_partial_state(model_self, model_state_dict, leftover=torch.nn.init.kaiming_normal_)
        >>> _ = load_partial_state(model_self, model_state_dict, leftover=torch.nn.init.kaiming_normal_, association='embedding')

    Example:
        >>> from netharn.initializers.functional import *  # NOQA
        >>> import netharn as nh
        >>> xpu = nh.XPU(None)
        >>> self1 = nh.models.ToyNet2d()
        >>> self2 = xpu.mount(self1)
        >>> load_partial_state(self2, self1.state_dict())
        >>> load_partial_state(self1, self2.state_dict())
        >>> # Add extra nonsense to state-dict
        >>> extra_state_dict = {'extra.' + k: v for k, v in self1.state_dict().items()}
        >>> extra_state_dict['stats'] = ub.peek(extra_state_dict.values()).clone()
        >>> model = self2
        >>> model_state_dict = extra_state_dict
        >>> load_partial_state(self2, extra_state_dict, association='embedding')

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> from netharn.initializers.functional import *  # NOQA
        >>> import torchvision
        >>> import torch
        >>> resnet50 = torchvision.models.resnet50()
        >>> class CustomModel(torch.nn.Module):
        >>>     def __init__(self):
        >>>         super().__init__()
        >>>         self.module = resnet50
        >>>         self.extra = torch.nn.Linear(1, 1)
        >>> model = CustomModel()
        >>> model_state_dict = resnet50.state_dict()
        >>> model_state_dict2 = {'prefix.' + k: v for k, v in model_state_dict.items()}
        >>> import ubelt as ub
        >>> with ub.Timer(verbose=2, label='strict'):
        >>>     load_partial_state(model, model_state_dict, association='strict', verbose=0)
        >>> with ub.Timer(verbose=2, label='prefix-hack'):
        >>>     load_partial_state(model, model_state_dict, association='prefix-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='module-hack'):
        >>>     load_partial_state(model, model_state_dict, association='module-hack', verbose=0)
        >>> with ub.Timer(verbose=2, label='embedding'):
        >>>     load_partial_state(model, model_state_dict, association='embedding', verbose=0)

        >>> load_partial_state(model, model_state_dict, association='prefix-hack', verbose=1)
        >>> load_partial_state(model, model_state_dict, association='module-hack', verbose=1)

    Ignore:
        >>> from bioharn.models.new_models_v1 import *  # NOQA
        >>> channels = ChannelSpec.coerce('rgb')
        >>> input_stats = None
        >>> self = MM_HRNetV2_w18_MaskRCNN(classes=3, channels=channels)
        >>> filename = self.pretrained_url
        >>> self._init_backbone_from_pretrained(self.pretrained_url)
        >>> from bioharn.models.mm_models import _load_mmcv_weights
        >>> model_state = _load_mmcv_weights(filename)
        >>> self.detector.backbone.chan_backbones.rgb
        >>> model = self
        >>> model_state_dict = model_state

        from netharn.initializers.functional import *  # NOQA
        import xdev
        globals().update(**xdev.get_func_kwargs(load_partial_state))

    CommandLine:
        xdoctest -m /home/joncrall/code/netharn/netharn/initializers/functional.py load_partial_state:2 --slow

    """
    if association is None:
        association = 'module-hack'  # old default
        # association = 'prefix-hack'  # new default

    if initializer is not None:
        warnings.warn('initializer is deprecated use leftover')
        leftover = initializer

    self_state = model.state_dict()

    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if 0:
            # Automatic way to reduce nodes in the trees?
            # If node b always follows node a, can we contract it?
            nodes1 = [n for p in other_keys for n in p.split('.')]
            nodes2 = [n for p in self_keys for n in p.split('.')]
            tups1 = list(tup for key in other_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            tups2 = list(tup for key in self_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            x = ub.ddict(list)
            for a, b in tups1:
                x[a].append(b)
            for a, b in tups2:
                x[a].append(b)

            nodehist = ub.dict_hist(nodes1 + nodes2)

            for k, v in x.items():
                print('----')
                print(k)
                print(nodehist[k])
                follow_hist = ub.dict_hist(v)
                print(follow_hist)
                total = sum(follow_hist.values())
                if ub.allsame(follow_hist.values()) and total == nodehist[k]:
                    print('CONTRACT')

            # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2]))
            # print(forest_str(paths_to_otree(other_keys, '.')))

        # common_keys = other_keys.intersection(self_keys)
        # if not common_keys:
        if not other_keys.issubset(self_keys):
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association in {'embedding', 'isomorphism'}:
                if verbose > 1:
                    print('Using subpath {} association, may take some time'.
                          format(association))
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)

                if 1:
                    # hack to filter to reduce tree size in embedding problem
                    def shrink_paths(paths):
                        new_paths = []
                        for p in paths:
                            p = p.replace('.0', ':0')
                            p = p.replace('.1', ':1')
                            p = p.replace('.2', ':2')
                            p = p.replace('.3', ':3')
                            p = p.replace('.4', ':4')
                            p = p.replace('.5', ':5')
                            p = p.replace('.6', ':6')
                            p = p.replace('.7', ':7')
                            p = p.replace('.8', ':8')
                            p = p.replace('.9', ':9')
                            p = p.replace('.weight', ':weight')
                            p = p.replace('.bias', ':bias')
                            p = p.replace('.num_batches_tracked',
                                          ':num_batches_tracked')
                            p = p.replace('.running_mean', ':running_mean')
                            p = p.replace('.running_var', ':running_var')
                            # p = p.replace('.conv1', ':conv1')
                            # p = p.replace('.conv2', ':conv2')
                            # p = p.replace('.conv3', ':conv3')
                            # p = p.replace('.bn1', ':bn1')
                            # p = p.replace('.bn2', ':bn2')
                            # p = p.replace('.bn3', ':bn3')
                            new_paths.append(p)
                        return new_paths

                    # Reducing the depth saves a lot of time
                    paths1_ = shrink_paths(paths1)
                    paths2_ = shrink_paths(paths2)

                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1_, paths2_, sep='.', mode=association)
                subpaths1 = [p.replace(':', '.') for p in subpaths1]
                subpaths2 = [p.replace(':', '.') for p in subpaths2]
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    other_unmapped = sorted(other_keys - set(mapping.keys()))
                    self_unmapped = sorted(self_keys - set(mapping.values()))
                    print('-- embed association (other -> self) --')
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                    print('self_unmapped = {}'.format(
                        ub.repr2(self_unmapped, nl=1)))
                    print('other_unmapped = {}'.format(
                        ub.repr2(other_unmapped, nl=1)))
                    print('len(mapping) = {}'.format(
                        ub.repr2(len(mapping), nl=1)))
                    print('len(self_unmapped) = {}'.format(
                        ub.repr2(len(self_unmapped), nl=1)))
                    print('len(other_unmapped) = {}'.format(
                        ub.repr2(len(other_unmapped), nl=1)))
                    print('-- end embed association --')

                # HACK: something might be wrong, there was an instance with
                # HRNet_w32 where multiple keys mapped to the same key
                # bad keys were incre_modules.3.0.conv1.weight and conv1.weight
                #
                # This will not error, but may produce bad output
                try:
                    model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                                   model_state_dict)
                except Exception as ex:
                    HACK = 1
                    if HACK:
                        new_state_dict_ = {}
                        for k, v in model_state_dict.items():
                            new_state_dict_[mapping.get(k, k)] = v
                        model_state_dict = new_state_dict_
                        warnings.warn('ex = {!r}'.format(ex))
                    else:
                        raise
            else:
                raise KeyError(association)
        return model_state_dict

    other_state = _fix_keys(model_state_dict)

    self_unset_keys = set(
        self_state.keys())  # will end up as keys in our that were not set
    other_unused_keys = set(other_state.keys(
    ))  # will end up as keys in the other model that were not used

    seen_keys = ub.ddict(set)

    for key, other_value in other_state.items():
        if key not in self_state:
            if verbose > 0:
                print('Skipping {} because it does not exist'.format(key))
            seen_keys['skipped'].add(key)
        else:
            self_value = self_state[key]
            if other_value.size() == self_value.size():
                self_state[key] = other_value
                self_unset_keys.remove(key)
                other_unused_keys.remove(key)
                seen_keys['full_add'].add(key)
            elif len(other_value.size()) == len(self_value.size()):
                if not mangle:
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size and mangle=False'
                            .format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                elif key.endswith('bias'):
                    if verbose > 0:
                        print(
                            'Skipping {} due to incompatable size'.format(key))
                        print(' * self  = {!r}'.format(self_value.size()))
                        print(' * other = {!r}'.format(other_value.size()))
                    seen_keys['skipped'].add(key)
                else:
                    if leftover is None:
                        if verbose > 0:
                            print(
                                'Skipping {} due to incompatable size and no default initializer'
                                .format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        seen_keys['skipped'].add(key)
                    else:
                        if verbose > 0:
                            print('Partially add {} with incompatable size'.
                                  format(key))
                            print(' * self  = {!r}'.format(self_value.size()))
                            print(' * other = {!r}'.format(other_value.size()))
                        # Initialize all weights in case any are unspecified
                        if leftover is None:
                            try:
                                leftover(self_state[key])
                            except Exception:
                                if verbose > 0:
                                    print('Unable to init {} with {}'.format(
                                        key, leftover))

                        # Transfer as much as possible
                        min_size = np.minimum(self_state[key].shape,
                                              other_value.shape)
                        sl = tuple([slice(0, s) for s in min_size])
                        self_state[key][sl] = other_value[sl]

                        # if shock_partial:
                        #     # Shock weights because we are doing something weird
                        #     # might help the network recover in case this is
                        #     # not a good idea
                        #     shock(self_state[key], func=leftover)
                        self_unset_keys.remove(key)
                        other_unused_keys.remove(key)

                        if self_state[key].numel() < other_value.numel():
                            seen_keys['partial_add_some'].add(key)
                        else:
                            seen_keys['partial_add_all'].add(key)
            else:
                if verbose > 0:
                    print('Skipping {} due to incompatable size'.format(key))
                    print(' * self  = {!r}'.format(self_value.size()))
                    print(' * other = {!r}'.format(other_value.size()))
                seen_keys['skipped'].add(key)

    if ignore_unset is True:
        self_unset_keys = []
    elif ignore_unset:
        self_unset_keys = list(ub.oset(self_unset_keys) - set(ignore_unset))

    if (self_unset_keys or other_unused_keys or seen_keys['partial_add_some']
            or seen_keys['partial_add_all']):
        if verbose > 0:
            if seen_keys:
                print('Pretrained weights are a partial fit')
            else:
                print('Pretrained weights do not fit!')
        if verbose > 1:
            print('Seen Keys: {}'.format(ub.repr2(seen_keys, nl=2)))
            print('Self Unset Keys: {}'.format(ub.repr2(self_unset_keys,
                                                        nl=1)))
            print('Other Unused keys: {}'.format(
                ub.repr2(other_unused_keys, nl=1)))
            print('summary:')
            seen_sum = ub.map_vals(len, seen_keys)
            print('Seen Num: {}'.format(ub.repr2(seen_sum, nl=2)))
            print('Self Unset Num: {}'.format(
                ub.repr2(len(self_unset_keys), nl=1)))
            print('Other Unused Num: {}'.format(
                ub.repr2(len(other_unused_keys), nl=1)))
        if leftover:
            if verbose > 0:
                print('Initializing unused keys using {}'.format(leftover))
            for key in self_unset_keys:
                if key.endswith('.num_batches_tracked'):
                    pass  # ignore num_batches_tracked
                elif key.endswith('.bias'):
                    self_state[key].fill_(0)
                else:
                    try:
                        leftover(self_state[key])
                    except Exception:
                        if verbose > 0:
                            print('Unable to init {} with {}'.format(
                                key, leftover))

    else:
        if verbose > 0:
            print('Pretrained weights are a perfect fit')
    model.load_state_dict(self_state)

    info = {
        'seen': seen_keys,
        'self_unset': self_unset_keys,
        'other_unused': other_unused_keys
    }
    return info

Exemplo n.º 20

0

Exibir arquivo

def benchmark_hash_file():
    """
    CommandLine:
        python ~/code/ubelt/dev/bench_hash.py --show
        python ~/code/ubelt/dev/bench_hash.py --show
    """
    import ubelt as ub
    import random

    # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp'))
    dpath = ub.ensuredir(ub.expandpath('$HOME/tmp'))

    rng = random.Random(0)
    # Create a pool of random chunks of data
    chunksize = int(2 ** 20)
    pool_size = 8
    part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)]

    #ITEM = 'JUST A STRING' * 100
    HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']

    scales = list(range(5, 10))
    import os

    results = ub.AutoDict()
    # Use json is faster or at least as fast it most cases
    # xxhash is also significantly faster than sha512
    ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
    for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
        N = 2 ** s
        print(' --- s={s}, N={N} --- '.format(s=s, N=N))
        # Write a big file
        size_pool = [N]
        fpath = _write_random_file(dpath, part_pool, size_pool, rng)

        megabytes = os.stat(fpath).st_size / (2 ** 20)
        print('megabytes = {!r}'.format(megabytes))

        for hasher in HASHERS:
            for timer in ti.reset(hasher):
                ub.hash_file(fpath, hasher=hasher)
            results[hasher].update({N: ti.mean()})
        col = {h: results[h][N] for h in HASHERS}
        sortx = ub.argsort(col)
        ranking = ub.dict_subset(col, sortx)
        print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
        best = next(iter(ranking))
        #pairs = list(ub.iter_window( 2))
        pairs = [(k, best) for k in ranking]
        ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
        nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
        relratios = ub.odict(zip(nicekeys, ratios))
        print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
    # xdoc +REQUIRES(--show)
    # import pytest
    # pytest.skip()
    import pandas as pd
    df = pd.DataFrame.from_dict(results)
    df.columns.name = 'hasher'
    df.index.name = 'N'
    ratios = df.copy().drop(columns=df.columns)
    for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]:
        ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
    print()
    print('Seconds per iteration')
    print(df.to_string(float_format='%.9f'))
    print()
    print('Ratios of seconds')
    print(ratios.to_string(float_format='%.2f'))
    print()
    print('Average Ratio (over all N)')
    print(ratios.mean().sort_values())
    if ub.argflag('--show'):
        import kwplot
        kwplot.autompl()
        xdata = sorted(ub.peek(results.values()).keys())
        ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results)
        kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds')
        kwplot.show_if_requested()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: siam_ibeis.py Projeto: jcfr/netharn

def randomized_ibeis_dset(dbname, dim=416):
    """
    CommandLine:
        xdoctest ~/code/netharn/netharn/examples/siam_ibeis.py randomized_ibeis_dset --show

    Example:
        >>> dbname = 'PZ_MTEST'
        >>> datasets = randomized_ibeis_dset(dbname)
        >>> # xdoctest: +REQUIRES(--show)
        >>> nh.util.qtensure()
        >>> self = datasets['train']
        >>> self.show_sample()
        >>> nh.util.show_if_requested()
    """
    from ibeis.algo.verif import vsone
    pblm = vsone.OneVsOneProblem.from_empty(dbname)

    # Simpler very randomized sample strategy
    pcc_sets = {
        'train': set(),
        'vali': set(),
        'test': set(),
    }

    vali_frac = .0
    test_frac = .1
    train_frac = 1 - (vali_frac + test_frac)

    category_probs = ub.odict([
        ('train', train_frac),
        ('test', test_frac),
        ('vali', vali_frac),
    ])

    rng = nh.util.ensure_rng(989540621)

    # Gather all PCCs
    pccs = sorted(map(frozenset, pblm.infr.positive_components()))

    # Each PCC in this group has a probability of going into the
    # either test / train / or vali split
    choices = rng.choice(list(category_probs.keys()),
                         p=list(category_probs.values()),
                         size=len(pccs))
    for key, pcc in zip(choices, pccs):
        pcc_sets[key].add(pcc)

    if __debug__:
        # Ensure sets of PCCs are disjoint!
        intersections = {}
        for key1, key2 in it.combinations(pcc_sets.keys(), 2):
            isect = pcc_sets[key1].intersection(pcc_sets[key2])
            intersections[(key1, key2)] = isect
        num_isects = ub.map_vals(len, intersections)
        if any(num_isects.values()):
            msg = 'Splits are not disjoint: {}'.format(
                ub.repr2(num_isects, sk=1))
            print(msg)
            raise AssertionError(msg)

    if True:
        num_pccs = ub.map_vals(len, pcc_sets)
        total = sum(num_pccs.values())
        fracs = {k: v / total for k, v in num_pccs.items()}
        print('Splits use the following fractions of data: {}'.format(
            ub.repr2(fracs, precision=4)))

        for key, want in category_probs.items():
            got = fracs[key]
            absdiff = abs(want - got)
            if absdiff > 0.1:
                raise AssertionError(
                    'Sampled fraction of {} for {!r} is significantly '
                    'different than what was requested: {}'.format(
                        got, key, want))

    test_dataset = RandomBalancedIBEISSample(pblm, pcc_sets['test'], dim=dim)
    train_dataset = RandomBalancedIBEISSample(pblm,
                                              pcc_sets['train'],
                                              dim=dim,
                                              augment=False)
    vali_dataset = RandomBalancedIBEISSample(pblm,
                                             pcc_sets['vali'],
                                             dim=dim,
                                             augment=False)

    datasets = {
        'train': train_dataset,
        'vali': vali_dataset,
        'test': test_dataset,
    }
    # datasets.pop('test', None)  # dont test for now (speed consideration)
    return datasets

Exemplo n.º 22

0

Exibir arquivo

 def freq_group(items, groupids):
     groups = ub.group_items(items, groupids)
     hist = ub.map_vals(len, groups)
     for k in ub.argsort(hist):
         yield groups[k]

Exemplo n.º 23

0

Exibir arquivo

Arquivo: bench_subregion_imread.py Projeto: Kitware/ndsampler

def time_ondisk_crop(size=512, dim=3, region='small_random', num=24):
    """
    Ignore:
        >>> from bench_subregion_imread import *  # NOQA
        >>> import xdev
        >>> globals().update(xdev.get_func_kwargs(time_ondisk_crop))
    """
    enabled = {
        'in_memory': False,
        'memmap': True,
        'PIL': False,
        'OpenCV': True,
        'VIPS': True,
        'GDAL': True,
        'HDF5': True,
    }

    print('\n---')

    # DATA = 'custom'
    DATA = 'random'

    if DATA == 'random':
        # data = (np.random.rand(500, 500, 3) * 255).astype(np.uint8)
        print('Generate random data, size={}, dim={}, mode={}'.format(
            size, dim, region))
        x, y = np.meshgrid(np.arange(0, size), np.arange(0, size))
        data = np.ascontiguousarray(
            (np.dstack([x] * dim) % 256).astype(np.uint8))
        data = data.squeeze()
    elif DATA == 'custom':
        print('USE CUSTOM data, size={}, dim={}, mode={}'.format(
            size, dim, region))
        assert False
        custom_fpath = '.ptif'
        import kwimage
        data = kwimage.imread(custom_fpath)
    else:
        raise KeyError(DATA)

    print('Make temp directory to prepare data')
    dpath = tempfile.mkdtemp()

    lossy_ext = ('.jpg', '.ptif', '.cog')

    img_fpaths = {
        # 'png': join(dpath, 'foo.png'),
        # 'jpg': join(dpath, 'foo.jpg'),
        # 'tif': join(dpath, 'foo.tif'),
    }
    pil_img = Image.fromarray(data)
    for k, v in img_fpaths.items():
        print('DUMP v = {!r}'.format(v))
        pil_img.save(v)

    img_fpaths['cog'] = join(dpath, 'foo.cog')
    # kwimage.imwrite(img_fpaths['cog'], data, backend='gdal', compress='LZW')
    kwimage.imwrite(img_fpaths['cog'], data, backend='gdal', compress='ZSTD')
    # imwrite_cloud_optimized_geotiff(img_fpaths['cog'], data)

    if DATA == 'custom':
        from os.path import splitext
        import shutil
        ext = splitext(custom_fpath)[1][1:]
        tmp_fpath = join(dpath, 'foo' + ext)
        shutil.copy2(custom_fpath, tmp_fpath)
        img_fpaths.update({
            'custom_' + ext: tmp_fpath,
        })

    mem_fpaths = {}
    if enabled['memmap']:
        mem_fpaths = {
            'npy': join(dpath, 'foo.npy'),
        }
        for key, fpath in mem_fpaths.items():
            print('DUMP fpath = {!r}'.format(fpath))
            np.save(fpath, data)

    h5_fpaths = {}
    if enabled['HDF5']:
        import h5py
        h5_params = {
            # 'basic': {},
            # 'chunks': {'chunks': (32, 32, 1)},
            # 'lzf': {'compression': 'lzf'},
            # 'lzf_chunk32': {'compression': 'lzf', 'chunks': (32, 32, 1)},
            'lzf_chunk128': {
                'compression': 'lzf',
                'chunks': (128, 128, 1)
            },
        }
        for key, kw in h5_params.items():
            print('Dump h5 ' + key)
            fpath = h5_fpaths[key] = join(dpath, key + '.h5')
            with h5py.File(fpath, 'w') as h5_file:
                dset = h5_file.create_dataset('DATA', data.shape, data.dtype,
                                              **kw)
                dset[...] = data

    import netharn as nh
    bytes_on_disk = {}
    for k, v in mem_fpaths.items():
        bytes_on_disk['mem_' + k] = nh.util.get_file_info(v)['filesize']
    for k, v in img_fpaths.items():
        bytes_on_disk['img_' + k] = nh.util.get_file_info(v)['filesize']
    for k, v in h5_fpaths.items():
        bytes_on_disk['hdf5_' + k] = nh.util.get_file_info(v)['filesize']

    mb_on_disk = ub.map_vals(lambda x: str(round(x * 1e-6, 2)) + ' MB',
                             bytes_on_disk)
    print('on-disk memory usage: ' + ub.repr2(mb_on_disk, nl=1))

    result = {}

    def record_result(timer):
        ti = timer.parent
        val = ti.min(), ti.mean(), ti.std()
        result[ti.label] = val

    rng = np.random.RandomState()

    def get_index():
        """ Get a subregion to load """
        if region == 'small_random':
            # Small window size, but random location
            size = (172, 172)
            h, w = size
            a = rng.randint(0, data.shape[0] - h)
            b = rng.randint(0, data.shape[1] - w)
            index = tuple([slice(a, a + h), slice(b, b + w)])
        elif region == 'random':
            a, b = sorted(rng.randint(0, data.shape[0], size=2))
            c, d = sorted(rng.randint(0, data.shape[1], size=2))
            index = tuple([slice(a, b + 1), slice(c, d + 1)])
        elif region == 'corner':
            index = tuple([slice(0, 8), slice(0, 8)])
        else:
            raise KeyError(index)
            # index = region
        if len(data.shape) > 2:
            index = index + tuple([slice(0, 3)])
        area = (index[1].start, index[0].start, index[1].stop, index[0].stop)
        shape = tuple([s.stop - s.start for s in index])
        return index, area, shape

    def TIMERIT(label):
        # Ensure each timer run uses the same random numbers
        rng.seed(0)
        return timerit.Timerit(
            num=num,
            bestof=1,
            label=label,
            # unit='us',
            unit='ms',
        )

    print('Begin benchmarks\n')

    if enabled['in_memory']:
        for timer in TIMERIT('in-memory slice'):
            index, area, shape = get_index()
            want = data[index]
            with timer:
                got = data[index]
        record_result(timer)

    if enabled['memmap']:
        for key, fpath in mem_fpaths.items():
            for timer in TIMERIT('np.memmap load+slice ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    file1 = np.memmap(fpath,
                                      dtype=data.dtype.name,
                                      shape=data.shape,
                                      offset=128,
                                      mode='r')
                    got = file1[index]
                assert np.all(got == want)
            record_result(timer)

    if enabled['memmap']:
        for key, fpath in mem_fpaths.items():
            for timer in TIMERIT('np.load load+slice ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    file2 = np.load(fpath, mmap_mode='r')
                    got = file2[index]
                assert np.all(got == want)
            record_result(timer)

    if enabled['PIL']:
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('PIL open+crop (minimal) ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    core = Image.open(fpath).crop(area)
            record_result(timer)

    if enabled['PIL']:
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('PIL open+crop+getdata+asarray ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    core = Image.open(fpath).crop(area)
                    got = np.asarray(core.getdata(), dtype=np.uint8)
                    got.shape = shape
            assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    if enabled['PIL']:
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('PIL open+asarray+slice ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    got = np.asarray(Image.open(fpath))[index]
            assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    if enabled['OpenCV']:
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('OpenCV imread+slice ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    got = cv2.imread(fpath, flags=cv2.IMREAD_UNCHANGED)[index]
                if len(index) > 2:
                    got = got[:, :, ::-1]
            assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    if enabled['GDAL']:
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('GDAL subregion ' + key):
                index, area, shape = get_index()
                want = data[index]
                with timer:
                    got = gdal_subregion_imread(fpath, index)
            assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    # pip install pyvips
    if enabled['VIPS']:
        import pyvips
        for key, fpath in img_fpaths.items():
            for timer in TIMERIT('VIPS ' + key):
                index, area, shape = get_index()
                want = data[index]
                left, top = area[0:2]
                width, height = shape[0:2][::-1]
                vips_img = pyvips.Image.new_from_file(
                    fpath,
                    # access='sequential',
                    access='random',
                    # memory=False,
                    # fail=True
                )
                with timer:
                    vips_sub = vips_img.crop(left, top, width, height)
                    got = np.ndarray(buffer=vips_sub.write_to_memory(),
                                     dtype=np.uint8,
                                     shape=[
                                         vips_sub.height, vips_sub.width,
                                         vips_sub.bands
                                     ])
            assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    if enabled['HDF5']:
        for key, fpath in h5_fpaths.items():
            for timer in TIMERIT('HDF5 ' + key):
                with h5py.File(fpath, 'r') as file:
                    dset = file['DATA']
                    index, area, shape = get_index()
                    want = data[index]
                    with timer:
                        got = dset[index]
                assert fpath.endswith(lossy_ext) or np.all(got == want)
            record_result(timer)

    return result

Exemplo n.º 24

0

Exibir arquivo

 def mapping_stats(xid_to_yids):
     n_yids = list(ub.map_vals(len, xid_to_yids).values())
     return util.stats_dict(n_yids, n_extreme=True)

Exemplo n.º 25

0

Exibir arquivo

    def _build_index(self):
        """
        build reverse indexes

        Notation:
            aid - Annotation ID
            gid - imaGe ID
            cid - Category ID
        """
        # create index
        anns, cats, imgs = {}, {}, {}
        gid_to_aids = ub.ddict(set)
        cid_to_gids = ub.ddict(set)
        cid_to_aids = ub.ddict(set)

        # Build one-to-one self-lookup maps
        for cat in self.dataset.get('categories', []):
            cid = cat['id']
            if cid in cat:
                warnings.warn(
                    'Categories have the same id in {}:\n{} and\n{}'.format(
                        self, cats[cid], cat))
            cats[cid] = cat

        for img in self.dataset.get('images', []):
            gid = img['id']
            if gid in imgs:
                warnings.warn(
                    'Images have the same id in {}:\n{} and\n{}'.format(
                        self, imgs[gid], img))
            imgs[gid] = img

        for ann in self.dataset.get('annotations', []):
            aid = ann['id']
            if aid in anns:
                warnings.warn(
                    'Annotations have the same id in {}:\n{} and\n{}'.format(
                        self, anns[aid], ann))
            anns[aid] = ann

        # Build one-to-many lookup maps
        for ann in anns.values():
            try:
                aid = ann['id']
                gid = ann['image_id']
                cid = ann['category_id']
            except KeyError:
                raise KeyError('Annotation does not have ids {}'.format(ann))

            if not isinstance(aid, int):
                raise TypeError('bad aid={} type={}'.format(aid, type(aid)))
            if not isinstance(gid, int):
                raise TypeError('bad gid={} type={}'.format(gid, type(gid)))
            if not isinstance(cid, int):
                raise TypeError('bad cid={} type={}'.format(cid, type(cid)))

            gid_to_aids[gid].add(aid)
            cid_to_gids[cid].add(gid)
            cid_to_aids[cid].add(aid)
            if gid not in imgs:
                warnings.warn('Annotation {} in {} references '
                              'unknown image_id'.format(ann, self))
            if cid not in cats:
                warnings.warn('Annotation {} in {} references '
                              'unknown category_id'.format(ann, self))

        # Fix one-to-zero cases
        for cid in cats.keys():
            if cid not in cid_to_aids:
                cid_to_aids[cid] = set()
            if cid not in cid_to_gids:
                cid_to_gids[cid] = set()

        for gid in imgs.keys():
            if gid not in gid_to_aids:
                gid_to_aids[gid] = set()

        # create class members
        self.anns = anns
        self.imgs = imgs
        self.cats = cats
        self.gid_to_aids = ub.map_vals(sorted, gid_to_aids)
        self.cid_to_gids = ub.map_vals(sorted, cid_to_gids)
        self.cid_to_aids = ub.map_vals(sorted, cid_to_aids)
        self.name_to_cat = {cat['name']: cat for cat in self.cats.values()}

Exemplo n.º 26

0

Exibir arquivo

Arquivo: fish_configs.py Projeto: Erotemic/misc

def make_baseline_truthfiles():
    work_dir = ub.truepath('~/work')
    data_dir = ub.truepath('~/data')

    challenge_data_dir = join(data_dir, 'viame-challenge-2018')
    challenge_work_dir = join(work_dir, 'viame-challenge-2018')

    ub.ensuredir(challenge_work_dir)

    img_root = join(challenge_data_dir, 'phase0-imagery')
    annot_dir = join(challenge_data_dir, 'phase0-annotations')
    fpaths = list(glob.glob(join(annot_dir, '*.json')))
    # ignore the non-bounding box nwfsc and afsc datasets for now

    # exclude = ('nwfsc', 'afsc', 'mouss', 'habcam')
    # exclude = ('nwfsc', 'afsc', 'mouss',)
    # fpaths = [p for p in fpaths if not basename(p).startswith(exclude)]

    import json
    dsets = ub.odict()
    for fpath in fpaths:
        key = basename(fpath).split('.')[0]
        dsets[key] = json.load(open(fpath, 'r'))

    print('Merging')
    merged = coco_union(dsets)

    merged_fpath = join(challenge_work_dir, 'phase0-merged.mscoco.json')
    with open(merged_fpath, 'w') as fp:
        json.dump(merged, fp, indent=4)

    import copy
    self = CocoDataset(copy.deepcopy(merged), img_root=img_root, autobuild=False)
    self._build_index()
    self.run_fixes()

    if True:
        # remove all point annotations
        print('Remove point annotations')
        to_remove = []
        for ann in self.dataset['annotations']:
            if ann['roi_shape'] == 'point':
                to_remove.append(ann)
        for ann in to_remove:
            self.dataset['annotations'].remove(ann)
        self._build_index()

        # remove empty images
        print('Remove empty images')
        to_remove = []
        for gid in self.imgs.keys():
            aids = self.gid_to_aids.get(gid, [])
            if not aids:
                to_remove.append(self.imgs[gid])
        for img in to_remove:
            self.dataset['images'].remove(img)
        self._build_index()

    print('# self.anns = {!r}'.format(len(self.anns)))
    print('# self.imgs = {!r}'.format(len(self.imgs)))
    print('# self.cats = {!r}'.format(len(self.cats)))

    catname_to_nannots = ub.map_keys(lambda x: self.cats[x]['name'],
                                     ub.map_vals(len, self.cid_to_aids))
    catname_to_nannots = ub.odict(sorted(catname_to_nannots.items(),
                                         key=lambda kv: kv[1]))
    print(ub.repr2(catname_to_nannots))

    if False:
        # aid = list(self.anns.values())[0]['id']
        # self.show_annotation(aid)
        gids = sorted([gid for gid, aids in self.gid_to_aids.items() if aids])
        # import utool as ut
        # for gid in ut.InteractiveIter(gids):
        for gid in gids:
            from matplotlib import pyplot as plt
            fig = plt.figure(1)
            fig.clf()
            self.show_annotation(gid=gid)
            fig.canvas.draw()

        for ann in self.anns.values():
            primary_aid = ann['id']
            print('primary_aid = {!r}'.format(primary_aid))
            print(len(self.gid_to_aids[ann['image_id']]))

            if 'roi_shape' not in ann:
                ann['roi_shape'] = 'bounding_box'

            if ann['roi_shape'] == 'boundingBox':
                pass

            if ann['roi_shape'] == 'point':
                primary_aid = ann['id']
                print('primary_aid = {!r}'.format(primary_aid))
                print(len(self.gid_to_aids[ann['image_id']]))
                break

    # Split into train / test  set
    print('Splitting')
    skf = StratifiedGroupKFold(n_splits=2)
    groups = [ann['image_id'] for ann in self.anns.values()]
    y = [ann['category_id'] for ann in self.anns.values()]
    X = [ann['id'] for ann in self.anns.values()]
    split = list(skf.split(X=X, y=y, groups=groups))[0]
    train_idx, test_idx = split

    print('Taking subsets')
    aid_to_gid = {aid: ann['image_id'] for aid, ann in self.anns.items()}
    train_aids = list(ub.take(X, train_idx))
    test_aids = list(ub.take(X, test_idx))
    train_gids = sorted(set(ub.take(aid_to_gid, train_aids)))
    test_gids = sorted(set(ub.take(aid_to_gid, test_aids)))

    train_dset = self.subset(train_gids)
    test_dset = self.subset(test_gids)

    print('---------')
    print('# train_dset.anns = {!r}'.format(len(train_dset.anns)))
    print('# train_dset.imgs = {!r}'.format(len(train_dset.imgs)))
    print('# train_dset.cats = {!r}'.format(len(train_dset.cats)))
    print('---------')
    print('# test_dset.anns = {!r}'.format(len(test_dset.anns)))
    print('# test_dset.imgs = {!r}'.format(len(test_dset.imgs)))
    print('# test_dset.cats = {!r}'.format(len(test_dset.cats)))

    train_dset._ensure_imgsize()
    test_dset._ensure_imgsize()

    print('Writing')
    with open(join(challenge_work_dir, 'phase0-merged-train.mscoco.json'), 'w') as fp:
        json.dump(train_dset.dataset, fp, indent=4)

    with open(join(challenge_work_dir, 'phase0-merged-test.mscoco.json'), 'w') as fp:
        json.dump(test_dset.dataset, fp, indent=4)

    # Make a detectron yaml file
    config_text = ub.codeblock(
        """
        MODEL:
          TYPE: generalized_rcnn
          CONV_BODY: ResNet.add_ResNet50_conv4_body
          NUM_CLASSES: {num_classes}
          FASTER_RCNN: True
        NUM_GPUS: 1
        SOLVER:
          WEIGHT_DECAY: 0.0001
          LR_POLICY: steps_with_decay
          BASE_LR: 0.01
          GAMMA: 0.1
          # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
          MAX_ITER: 180000
          STEPS: [0, 120000, 160000]
        RPN:
          SIZES: (32, 64, 128, 256, 512)
        FAST_RCNN:
          ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
          ROI_XFORM_METHOD: RoIAlign
        TRAIN:
          WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
          DATASETS: ('/work/viame-challenge-2018/phase0-merged-train.mscoco.json',)
          IM_DIR: '/data/viame-challenge-2018/phase0-imagery'
          SCALES: (800,)
          MAX_SIZE: 1333
          IMS_PER_BATCH: 1
          BATCH_SIZE_PER_IM: 512
        TEST:
          DATASETS: ('/work/viame-challenge-2018/phase0-merged-test.mscoco.json',)
          IM_DIR: '/data/viame-challenge-2018/phase0-imagery'
          SCALES: (800,)
          MAX_SIZE: 1333
          NMS: 0.5
          RPN_PRE_NMS_TOP_N: 6000
          RPN_POST_NMS_TOP_N: 1000
        OUTPUT_DIR: /work/viame-challenge-2018/output
        """)
    config_text = config_text.format(
        num_classes=len(self.cats),
    )
    ub.writeto(join(challenge_work_dir, 'phase0-faster-rcnn.yaml'), config_text)

    docker_cmd = ('nvidia-docker run '
                  '-v {work_dir}:/work -v {data_dir}:/data '
                  '-it detectron:c2-cuda9-cudnn7 bash').format(
                      work_dir=work_dir, data_dir=data_dir)

    train_cmd = ('python2 tools/train_net.py '
                 '--cfg /work/viame-challenge-2018/phase0-faster-rcnn.yaml '
                 'OUTPUT_DIR /work/viame-challenge-2018/output')

    hacks = ub.codeblock(
        """
        git remote add Erotemic https://github.com/Erotemic/Detectron.git
        git fetch --all
        git checkout general_dataset

        # curl https://github.com/Erotemic/Detectron/blob/42d44b2d155c775dc509b6a44518d0c582f8cdf5/tools/train_net.py
        # wget https://github.com/Erotemic/Detectron/blob/42d44b2d155c775dc509b6a44518d0c582f8cdf5/lib/core/config.py
        """)

    print(docker_cmd)
    print(train_cmd)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: export_subset.py Projeto: Hrmirzadeh/wildbook-ia

def fix_annotmatch_pzmaster1():
    """
    PZ_Master1 had annotmatch rowids that did not agree with the current name
    labeling. Looking at the inconsistencies in the graph interface was too
    cumbersome, because over 3000 annots were incorrectly grouped together.

    This function deletes any annotmatch rowid that is not consistent with the
    current labeling so we can go forward with using the new AnnotInference
    object
    """
    import wbia

    ibs = wbia.opendb('PZ_Master1')
    infr = wbia.AnnotInference(ibs=ibs, aids=ibs.get_valid_aids(), verbose=5)
    infr.initialize_graph()
    annots = ibs.annots()
    aid_to_nid = ut.dzip(annots.aids, annots.nids)

    if False:
        infr.reset_feedback()
        infr.ensure_mst()
        infr.apply_feedback_edges()
        infr.relabel_using_reviews()
        infr.start_qt_interface()

    # Get annotmatch rowids that agree with current labeling
    if False:
        annotmatch = ibs.db.get_table_as_pandas('annotmatch')
        import pandas as pd

        flags1 = pd.isnull(annotmatch['annotmatch_evidence_decision'])
        flags2 = annotmatch['annotmatch_tag_text'] == ''
        bad_part = annotmatch[flags1 & flags2]
        rowids = bad_part.index.tolist()
        ibs.delete_annotmatch(rowids)

    if False:
        # Delete bidirectional annotmatches
        annotmatch = ibs.db.get_table_as_pandas('annotmatch')
        df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2'])

        # Find entires that have both directions
        pairs1 = annotmatch[['annot_rowid1', 'annot_rowid2']].values
        f_edges = {tuple(p) for p in pairs1}
        b_edges = {tuple(p[::-1]) for p in pairs1}
        isect_edges = {tuple(sorted(p)) for p in b_edges.intersection(f_edges)}
        isect_edges1 = list(isect_edges)
        isect_edges2 = [p[::-1] for p in isect_edges]

        # cols = ['annotmatch_evidence_decision', 'annotmatch_tag_text']
        import pandas as pd

        custom_ = {
            (559, 4909): (False, ['photobomb']),
            (7918, 8041): (False, ['photobomb']),
            (6634, 6754): (False, ['photobomb']),
            (3707, 3727): (False, ['photobomb']),
            (86, 103): (False, ['photobomb']),
        }
        extra_ = {}

        fixme_edges = []

        d1 = df.loc[isect_edges1].reset_index(drop=False)
        d2 = df.loc[isect_edges2].reset_index(drop=False)
        flags = d1['annotmatch_evidence_decision'] != d2[
            'annotmatch_evidence_decision']
        from wbia.tag_funcs import _parse_tags

        for f, r1, r2 in zip(flags, d1.iterrows(), d2.iterrows()):
            v1, v2 = r1[1], r2[1]
            aid1 = v1['annot_rowid1']
            aid2 = v1['annot_rowid2']
            truth_real = (ibs.const.EVIDENCE_DECISION.POSITIVE
                          if aid_to_nid[aid1] == aid_to_nid[aid2] else
                          ibs.const.EVIDENCE_DECISION.NEGATIVE)
            truth1 = v1['annotmatch_evidence_decision']
            truth2 = v2['annotmatch_evidence_decision']
            t1 = _parse_tags(v1['annotmatch_tag_text'])
            t2 = _parse_tags(v2['annotmatch_tag_text'])
            newtag = ut.union_ordered(t1, t2)
            if (aid1, aid2) in custom_:
                continue
            fixme_flag = False
            if not pd.isnull(truth1):
                if truth_real != truth1:
                    fixme_flag = True
            if not pd.isnull(truth2):
                if truth_real != truth2:
                    fixme_flag = True
            if fixme_flag:
                logger.info('newtag = %r' % (newtag, ))
                logger.info('truth_real = %r' % (truth_real, ))
                logger.info('truth1 = %r' % (truth1, ))
                logger.info('truth2 = %r' % (truth2, ))
                logger.info('aid1 = %r' % (aid1, ))
                logger.info('aid2 = %r' % (aid2, ))
                fixme_edges.append((aid1, aid2))
            else:
                extra_[(aid1, aid2)] = (truth_real, newtag)

        extra_.update(custom_)
        new_pairs = extra_.keys()
        new_truths = ut.take_column(ut.dict_take(extra_, new_pairs), 0)
        new_tags = ut.take_column(ut.dict_take(extra_, new_pairs), 1)
        new_tag_texts = [';'.join(t) for t in new_tags]
        aids1, aids2 = ut.listT(new_pairs)

        # Delete the old
        ibs.delete_annotmatch((d1['annotmatch_rowid'].values.tolist() +
                               d2['annotmatch_rowid'].values.tolist()))

        # Add the new
        ams = ibs.add_annotmatch_undirected(aids1, aids2)
        ibs.set_annotmatch_evidence_decision(ams, new_truths)
        ibs.set_annotmatch_tag_text(ams, new_tag_texts)

        if False:
            import wbia.guitool as gt

            gt.ensure_qapp()
            ut.qtensure()
            from wbia.gui import inspect_gui

            inspect_gui.show_vsone_tuner(ibs, aid1, aid2)

        # pairs2 = pairs1.T[::-1].T
        # idx1, idx2 = ut.isect_indices(list(map(tuple, pairs1)),
        #                               list(map(tuple, pairs2)))
        # r_edges = list(set(map(tuple, map(sorted, pairs1[idx1]))))
        # unique_pairs = list(set(map(tuple, map(sorted, pairs1[idx1]))))
        # df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2'])

    x = ut.ddict(list)
    annotmatch = ibs.db.get_table_as_pandas('annotmatch')
    import ubelt as ub

    _iter = annotmatch.iterrows()
    prog = ub.ProgIter(_iter, length=len(annotmatch))
    for k, m in prog:
        aid1 = m['annot_rowid1']
        aid2 = m['annot_rowid2']
        if m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.POSITIVE:
            if aid_to_nid[aid1] == aid_to_nid[aid2]:
                x['agree1'].append(k)
            else:
                x['disagree1'].append(k)
        elif m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.NEGATIVE:
            if aid_to_nid[aid1] == aid_to_nid[aid2]:
                x['disagree2'].append(k)
            else:
                x['agree2'].append(k)

    ub.map_vals(len, x)
    ut.dict_hist(annotmatch.loc[x['disagree1']]['annotmatch_tag_text'])

    disagree1 = annotmatch.loc[x['disagree1']]
    pb_disagree1 = disagree1[disagree1['annotmatch_tag_text'] == 'photobomb']
    aids1 = pb_disagree1['annot_rowid1'].values.tolist()
    aids2 = pb_disagree1['annot_rowid2'].values.tolist()
    aid_pairs = list(zip(aids1, aids2))
    infr = wbia.AnnotInference.from_pairs(aid_pairs, ibs=ibs, verbose=5)
    if False:
        feedback = infr.read_wbia_annotmatch_feedback(edges=infr.edges())
        infr.external_feedback = feedback
        infr.apply_feedback_edges()
        infr.start_qt_interface(loop=False)

    # Delete these values
    if False:
        nonpb_disagree1 = disagree1[
            disagree1['annotmatch_tag_text'] != 'photobomb']
        disagree2 = annotmatch.loc[x['disagree2']]
        ibs.delete_annotmatch(nonpb_disagree1['annotmatch_rowid'])
        ibs.delete_annotmatch(disagree2['annotmatch_rowid'])

    # ut.dict_hist(disagree1['annotmatch_tag_text'])
    import networkx as nx

    graph = nx.Graph()
    graph.add_edges_from(
        zip(pb_disagree1['annot_rowid1'], pb_disagree1['annot_rowid2']))
    list(nx.connected_components(graph))

    set(annotmatch.loc[x['disagree2']]['annotmatch_tag_text'])

Exemplo n.º 28

0

Exibir arquivo

Arquivo: coerce_data.py Projeto: Kitware/ndsampler

def coerce_datasets(config, build_hashid=False, verbose=1):
    """
    Coerce train / val / test datasets from standard netharn config keys

    TODO:
        * Does this belong in netharn?

    This only looks at the following keys in config:
        * datasets
        * train_dataset
        * vali_dataset
        * test_dataset

    Example:
        >>> import kwcoco
        >>> import ndsampler.coerce_data
        >>> config = {'datasets': 'special:shapes'}
        >>> print('config = {!r}'.format(config))
        >>> dsets = ndsampler.coerce_data.coerce_datasets(config)
        >>> print('dsets = {!r}'.format(dsets))

        >>> config = {'datasets': 'special:shapes256'}
        >>> ndsampler.coerce_data.coerce_datasets(config)

        >>> config = {
        >>>     'datasets': kwcoco.CocoDataset.demo('shapes'),
        >>> }
        >>> coerce_datasets(config)
        >>> coerce_datasets({
        >>>     'datasets': kwcoco.CocoDataset.demo('shapes'),
        >>>     'test_dataset': kwcoco.CocoDataset.demo('photos'),
        >>> })
        >>> coerce_datasets({
        >>>     'datasets': kwcoco.CocoDataset.demo('shapes'),
        >>>     'test_dataset': kwcoco.CocoDataset.demo('photos'),
        >>> })
    """

    # Ideally the user specifies a standard train/vali/test split
    def _rectify_fpath(key):
        fpath = key
        fpath = fpath.lstrip('path:').lstrip('PATH:')
        fpath = ub.expandpath(fpath)
        return fpath

    def _ensure_coco(coco):
        # Map a file path or an in-memory dataset to a CocoDataset
        import kwcoco
        import six
        from os.path import exists
        if coco is None:
            return None
        elif isinstance(coco, six.string_types):
            fpath = _rectify_fpath(coco)
            if exists(fpath):
                with ub.Timer(
                        'read kwcoco dataset: fpath = {!r}'.format(fpath)):
                    coco = kwcoco.CocoDataset(fpath, autobuild=False)
                print('building kwcoco index')
                coco._build_index()
            else:
                if not coco.lower().startswith('special:'):
                    import warnings
                    warnings.warn('warning start dataset codes with special:')
                    code = coco
                else:
                    code = coco.lower()[len('special:'):]
                coco = kwcoco.CocoDataset.demo(code)
        else:
            # print('live dataset')
            assert isinstance(coco, kwcoco.CocoDataset)
        return coco

    config = config.copy()

    subsets = {
        'train': config.get('train_dataset', None),
        'vali': config.get('vali_dataset', None),
        'test': config.get('test_dataset', None),
    }

    # specifying any train / vali / test disables datasets
    if any(d is not None for d in subsets.values()):
        config['datasets'] = None

    if verbose:
        print('[ndsampler.coerce_data] Checking for explicit subsets')
    subsets = ub.map_vals(_ensure_coco, subsets)

    # However, sometimes they just specify a single dataset, and we need to
    # make a split for it.
    # print('config = {!r}'.format(config))
    base = _ensure_coco(config.get('datasets', None))
    print('[ndsampler.coerce_data] base = {!r}'.format(base))
    if base is not None:
        if verbose:
            print('Splitting base into train/vali')
        # TODO: the actual split may need to be cached.
        factor = config.get('split_factor', 3)
        split_gids = _split_train_vali_test(base, factor=factor)
        if config.get('no_test', False):
            split_gids['train'] += split_gids.pop('test')
        for tag in split_gids.keys():
            gids = split_gids[tag]
            subset = base.subset(sorted(gids), copy=True)
            subset.tag = base.tag + '-' + tag
            subsets[tag] = subset

    subsets = {k: v for k, v in subsets.items() if v is not None}
    if build_hashid:
        print('Building subset hashids')
        for tag, subset in subsets.items():
            print('Build index for {}'.format(subset.tag))
            subset._build_index()
            print('Build hashid for {}'.format(subset.tag))
            subset._build_hashid(hash_pixels=False, verbose=10)

    # if verbose:
    #     print(_catfreq_columns_str(subsets))
    return subsets

Exemplo n.º 29

0

Exibir arquivo

Arquivo: pogo_api.py Projeto: Erotemic/pypogo

    def __init__(api):
        api.base = 'https://pogoapi.net/api/v1/'
        api.routes = {
            'pokemon_stats': api.base + 'pokemon_stats.json',
            'current_pokemon_moves': api.base + 'current_pokemon_moves.json',
            'pokemon_evolutions': api.base + 'pokemon_evolutions.json',
            'cp_multiplier': api.base + 'cp_multiplier.json',
            'pokemon_types': api.base + 'pokemon_types.json',
            'charged_moves': api.base + 'charged_moves.json',
            'fast_moves': api.base + 'fast_moves.json',
            'type_effectiveness': api.base + 'type_effectiveness.json',
            'pokemon_powerup_requirements':
            api.base + 'pokemon_powerup_requirements.json',
            'pokemon_candy_to_evolve':
            api.base + 'pokemon_candy_to_evolve.json',
            'pokemon_buddy_distances':
            api.base + 'pokemon_buddy_distances.json',
            'shadow_pokemon': api.base + 'shadow_pokemon.json',
            'pokemon_forms': api.base + 'pokemon_forms.json',
            'pvp_exclusive_pokemon': api.base + 'pvp_exclusive_pokemon.json',
            'galarian_pokemon': api.base + 'galarian_pokemon.json',
            'alolan_pokemon': api.base + 'alolan_pokemon.json',
            'shiny_pokemon': api.base + 'shiny_pokemon.json',
            'mega_pokemon': api.base + 'mega_pokemon.json',
            'baby_pokemon': api.base + 'baby_pokemon.json',
            'nesting_pokemon': api.base + 'nesting_pokemon.json',
            'released_pokemon': api.base + 'released_pokemon.json',
            'pokemon_names': api.base + 'pokemon_names.json',
            'api_hashes': api.base + 'api_hashes.json',
            'pvp_fast_moves': api.base + 'pvp_fast_moves.json',
            'pvp_charged_moves': api.base + 'pvp_charged_moves.json',
        }

        # TODO: determine when to redownload

        api.data = {}
        for key, url in api.routes.items():

            redo = 0
            data_fpath = ub.grabdata(url,
                                     verbose=1,
                                     redo=redo,
                                     expires=24 * 60 * 60)

            with open(data_fpath, 'r') as file:
                data = json.load(file)
            api.data[key] = data

        # Make the API global for now
        pokemon_stats = api.data['pokemon_stats']
        _name_to_stats = ub.group_items(
            pokemon_stats, lambda item: item['pokemon_name'].lower())
        _name_to_stats = dict(_name_to_stats)
        api.name_to_stats = _name_to_stats

        _name_to_moves = ub.group_items(
            api.data['current_pokemon_moves'],
            lambda item: item['pokemon_name'].lower())
        _name_to_moves.default_factory = None
        _name_to_moves = dict(_name_to_moves)

        # base = 'http://pokeapi.co/api/v2/pokemon/'
        api.name_to_moves = _name_to_moves

        evolutions = api.data['pokemon_evolutions']
        _name_to_evolutions = ub.group_items(
            evolutions, lambda item: item['pokemon_name'].lower())
        _name_to_evolutions = dict(_name_to_evolutions)

        for key, form_stats in api.name_to_stats.items():
            if key not in _name_to_evolutions:
                noevos = []
                for s in form_stats:
                    empty = ub.dict_isect(
                        s, {'form', 'pokemon_name', 'pokemon_id'})
                    empty['evolutions'] = []
                    noevos.append(empty)
                _name_to_evolutions[key] = noevos

        _name_to_types = ub.group_items(
            api.data['pokemon_types'],
            lambda item: item['pokemon_name'].lower())
        _name_to_types = dict(_name_to_types)
        api.name_to_type = _name_to_types

        evo_graph = nx.DiGraph()
        for name, form_evo_list in _name_to_evolutions.items():
            for form_evo in form_evo_list:
                u = form_evo['pokemon_name'].lower()
                evo_graph.add_node(u)
                for evo in form_evo['evolutions']:
                    v = evo['pokemon_name'].lower()
                    evo_graph.add_edge(u, v)

        api.name_to_family = {}
        api.name_to_base = {}

        evo_graph.remove_edges_from(nx.selfloop_edges(evo_graph))
        api.evo_graph = evo_graph
        for cc in list(nx.connected_components(api.evo_graph.to_undirected())):
            bases = [n for n in cc if len(evo_graph.pred[n]) == 0]
            base = bases[0]
            for n in cc:
                api.name_to_family[n] = cc
                api.name_to_base[n] = base

        api.name_to_evolutions = _name_to_evolutions

        api.pve_fast_moves = ub.group_items(
            api.data['fast_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pve_fast_moves.default_factory = None

        api.pve_charged_moves = ub.group_items(
            api.data['charged_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pve_charged_moves.default_factory = None

        api.pvp_fast_moves = ub.group_items(
            api.data['pvp_fast_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pvp_fast_moves.default_factory = None

        api.pvp_charged_moves = ub.group_items(
            api.data['pvp_charged_moves'],
            lambda item: normalize(item['name'].lower()))
        api.pvp_charged_moves.default_factory = None

        if 0:
            ub.map_vals(len, api.pve_fast_moves)
            ub.map_vals(len, api.pve_charged_moves)

        api.learnable = {
            # TODO: remove
            'stunfisk_galarian': {
                'fast': [
                    'MUD_SHOT',
                    'METAL_CLAW',
                ],
                'charge': [
                    'EARTHQUAKE',
                    'FLASH_CANNON',
                    'MUDDY_WATER',
                    'ROCK_SLIDE',
                ]
            }
        }

Exemplo n.º 30

0

Exibir arquivo

def instance_fscore(gti, uncertain, dsm, pred, info=False):
    """
    path = '/home/local/KHQ/jon.crall/data/work/urban_mapper/eval/input_4224-rwyxarza/solver_4214-yxalqwdk_unet_vgg_nttxoagf_a=1,n_ch=5,n_cl=3/_epoch_00000236/restiched/pred'

    path = ub.truepath(
        '~/remote/aretha/data/work/urban_mapper2/test/input_4224-exkudlzu/'
        'solver_4214-guwsobde_unet_mmavmuou_eqnoygqy_a=1,c=RGB,n_ch=5,n_cl=4/'
        '_epoch_00000154/restiched/pred')
    mode_paths = sorted(glob.glob(path + '/*.png'))

    def instance_label(pred, k=15, n_iters=1, dist_thresh=5, watershed=False):
        mask = pred

        # noise removal
        if k > 1 and n_iters > 0:
            kernel = np.ones((k, k), np.uint8)
            mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel,
                                    iterations=n_iters)

        if watershed:
            from clab.torch import filters
            mask = filters.watershed_filter(mask, dist_thresh=dist_thresh)

        mask = mask.astype(np.uint8)
        n_ccs, cc_labels = cv2.connectedComponents(mask, connectivity=4)
        return cc_labels

    from clab.tasks.urban_mapper_3d import UrbanMapper3D
    task = UrbanMapper3D('', '')

    fscores = []
    for pred_fpath in ub.ProgIter(mode_paths):
        pass
        gtl_fname = basename(pred_fpath).replace('.png', '_GTL.tif')
        gti_fname = basename(pred_fpath).replace('.png', '_GTI.tif')
        dsm_fname = basename(pred_fpath).replace('.png', '_DSM.tif')
        bgr_fname = basename(pred_fpath).replace('.png', '_RGB.tif')
        gtl_fpath = join(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/'), gtl_fname)
        gti_fpath = join(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/'), gti_fname)
        dsm_fpath = join(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/'), dsm_fname)
        bgr_fpath = join(ub.truepath('~/remote/aretha/data/UrbanMapper3D/training/'), bgr_fname)

        pred_seg = util.imread(pred_fpath)

        pred = instance_label2(pred_seg, dist_thresh=d, k=k, watershed=True)
        gti = util.imread(gti_fpath)
        gtl = util.imread(gtl_fpath)
        dsm = util.imread(dsm_fpath)
        bgr = util.imread(bgr_fpath)

        uncertain = (gtl == 65)

        fscore = instance_fscore(gti, uncertain, dsm, pred)
        fscores.append(fscore)
    print('k = {!r}'.format(k))
    print('d = {!r}'.format(d))
    print(np.mean(fscores))


    from clab import profiler
    instance_fscore_ = dynamic_profile(instance_fscore)
    fscore = instance_fscore_(gti, uncertain, dsm, pred)
    instance_fscore_.profile.profile.print_stats()
    """
    def _bbox(arr):
        # r1, c1, r2, c2
        return np.hstack([arr.min(axis=0), arr.max(axis=0)])

    def cc_locs(ccs):
        rc_locs = np.where(ccs > 0)
        rc_ids = ccs[rc_locs]
        rc_arr = np.ascontiguousarray(np.vstack(rc_locs).T)
        unique_labels, groupxs = util.group_indices(rc_ids)
        grouped_arrs = util.apply_grouping(rc_arr, groupxs, axis=0)
        id_to_rc = ub.odict(zip(unique_labels, grouped_arrs))
        return id_to_rc, unique_labels, groupxs, rc_arr

    (true_rcs_arr, group_true_labels, true_groupxs, true_rc_arr) = cc_locs(gti)

    (pred_rcs_arr, group_pred_labels, pred_groupxs,
     pred_rc_arr) = cc_locs(pred)

    DSM_NAN = -32767
    MIN_SIZE = 100
    MIN_IOU = 0.45
    # H, W = pred.shape[0:2]

    # --- Find uncertain truth ---
    # any gt-building explicitly labeled in the GTL is uncertain
    uncertain_labels = set(np.unique(gti[uncertain.astype(np.bool)]))
    # Any gt-building less than 100px or at the boundary is uncertain.
    for label, rc_arr in true_rcs_arr.items():
        if len(rc_arr) < MIN_SIZE:
            rc_arr = np.array(list(rc_arr))
            if (np.any(rc_arr == 0) or np.any(rc_arr == 2047)):
                uncertain_labels.add(label)
            else:
                rc_loc = tuple(rc_arr.T)
                is_invisible = (dsm[rc_loc] == DSM_NAN)
                if np.any(is_invisible):
                    invisible_rc = rc_arr.compress(is_invisible, axis=0)
                    invisible_rc_set = set(map(tuple, invisible_rc))
                    # Remove invisible pixels
                    remain_rc_set = list(
                        set(map(tuple, rc_arr)).difference(invisible_rc_set))
                    true_rcs_arr[label] = np.array(remain_rc_set)
                    uncertain_labels.add(label)

    def make_int_coords(rc_arr, unique_labels, groupxs):
        # using nums instead of tuples gives the intersection a modest speedup
        rc_int = rc_arr.T[0] + pred.shape[0] + rc_arr.T[1]
        id_to_rc_int = ub.odict(
            zip(unique_labels, map(set, util.apply_grouping(rc_int, groupxs))))
        return id_to_rc_int

    # Make intersection a bit faster by filtering via bbox fist
    true_rcs_bbox = ub.map_vals(_bbox, true_rcs_arr)
    pred_rcs_bbox = ub.map_vals(_bbox, pred_rcs_arr)

    true_bboxes = np.array(list(true_rcs_bbox.values()))
    pred_bboxes = np.array(list(pred_rcs_bbox.values()))

    candidate_matches = {}
    for plabel, pb in zip(group_pred_labels, pred_bboxes):
        irc1 = np.maximum(pb[0:2], true_bboxes[:, 0:2])
        irc2 = np.minimum(pb[2:4], true_bboxes[:, 2:4])
        irc1 = np.minimum(irc1, irc2, out=irc1)
        isect_area = np.prod(np.abs(irc2 - irc1), axis=1)
        tlabels = list(ub.take(group_true_labels, np.where(isect_area)[0]))
        candidate_matches[plabel] = set(tlabels)

    # using nums instead of tuples gives the intersection a modest speedup
    pred_rcs_ = make_int_coords(pred_rc_arr, group_pred_labels, pred_groupxs)
    true_rcs_ = make_int_coords(true_rc_arr, group_true_labels, true_groupxs)

    # Greedy matching
    unused_true_rcs = true_rcs_.copy()
    FP = TP = FN = 0
    unused_true_keys = set(unused_true_rcs.keys())

    assignment = []
    fp_labels = []
    fn_labels = []
    tp_labels = []

    for pred_label, pred_rc_set in pred_rcs_.items():

        best_score = (-np.inf, -np.inf)
        best_label = None

        # Only check unused true labels that intersect with the predicted bbox
        true_cand = candidate_matches[pred_label] & unused_true_keys
        for true_label in true_cand:
            true_rc_set = unused_true_rcs[true_label]
            n_isect = len(pred_rc_set.intersection(true_rc_set))
            iou = n_isect / (len(true_rc_set) + len(pred_rc_set) - n_isect)
            if iou > MIN_IOU:
                score = (iou, -true_label)
                if score > best_score:
                    best_score = score
                    best_label = true_label

        if best_label is not None:
            assignment.append((pred_label, best_label, best_score[0]))
            unused_true_keys.remove(best_label)
            if true_label not in uncertain_labels:
                TP += 1
                tp_labels.append((pred_label, best_label, best_score[0]))
        else:
            FP += 1
            fp_labels.append(pred_label)

    # Had two bugs:
    # * used wrong variable to count false negs (all true were labeled as FN)
    #   (massivly increasing FN)
    # * Certain true building as marked as uncertain, but I was checking
    #   against the pred labels instead (possibly decreasing/increasing TP)

    fn_labels = unused_true_keys - uncertain_labels  # NOQA
    FN = len(fn_labels)

    precision = TP / (TP + FP) if TP > 0 else 0
    recall = TP / (TP + FN) if TP > 0 else 0
    if precision > 0 and recall > 0:
        f_score = 2 * precision * recall / (precision + recall)
    else:
        f_score = 0

    # They multiply by 1e6, but lets not do that.
    if info:
        infod = {
            'assign': assignment,
            'tp': tp_labels,
            'fp': fp_labels,
            'fn': fn_labels,
            'uncertain': uncertain_labels,
        }
        return (f_score, precision, recall), infod

    return (f_score, precision, recall)