Exemplo n.º 1
0
    def update_neighbors(self):
        # TODO: this should be done with a fast spatial index, but
        # unfortunately I don't see any existing implementations that make it
        # easy to support moving points.
        utriu_dists = pdist(self.pos)
        utriu_flags = utriu_dists < self.config['perception_thresh']
        utriu_rx, utriu_cx = np.triu_indices(len(self.pos), k=1)

        utriu_neighb_rxs = utriu_rx[utriu_flags]
        utriu_neighb_cxs = utriu_cx[utriu_flags]

        neighb_rxs = np.r_[utriu_neighb_rxs, utriu_neighb_cxs]
        neighb_cxs = np.r_[utriu_neighb_cxs, utriu_neighb_rxs]

        group_rxs, groupxs = kwarray.group_indices(neighb_rxs)
        group_cxs = kwarray.apply_grouping(neighb_cxs, groupxs)

        rx_to_neighb_cxs = ub.dzip(group_rxs, group_cxs)

        # n = len(self.pos)
        # rx_to_neighb_utriu_idxs = {}
        # for rx, cxs in rx_to_neighb_cxs.items():
        #     rxs = np.full_like(cxs, fill_value=rx)
        #     multi_index = (rxs, cxs)
        #     utriu_idxs = triu_condense_multi_index(
        #         multi_index, dims=(n, n), symetric=True)
        #     rx_to_neighb_utriu_idxs[rx] = utriu_idxs

        # self.utriu_dists = utriu_dists
        self.rx_to_neighb_cxs = rx_to_neighb_cxs
        # self.rx_to_neighb_utriu_idxs = rx_to_neighb_utriu_idxs

        # Compute speed and direction of every boid
        self.speeds = np.linalg.norm(self.vel, axis=1)
        self.dirs = self.vel / self.speeds[:, None]
Exemplo n.º 2
0
def setup_datasets(workdir=None):
    if workdir is None:
        workdir = ub.expandpath('~/data/mnist/')

    # Define your dataset
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        # torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])

    learn_dset = nh.data.MNIST(workdir,
                               transform=transform,
                               train=True,
                               download=True)

    test_dset = nh.data.MNIST(workdir,
                              transform=transform,
                              train=False,
                              download=True)

    # split the learning dataset into training and validation
    # take a subset of data
    factor = .15
    n_vali = int(len(learn_dset) * factor)
    learn_idx = np.arange(len(learn_dset))

    rng = np.random.RandomState(0)
    rng.shuffle(learn_idx)

    reduction = int(ub.argval('--reduction', default=1))
    vali_idx = torch.LongTensor(learn_idx[:n_vali][::reduction])
    train_idx = torch.LongTensor(learn_idx[n_vali:][::reduction])

    train_dset = torch.utils.data.Subset(learn_dset, train_idx)
    vali_dset = torch.utils.data.Subset(learn_dset, vali_idx)

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }
    if not ub.argflag('--test'):
        del datasets['test']
    for tag, dset in datasets.items():
        # Construct the PCCs (positive connected components)
        # These are groups of item indices which are positive matches
        if isinstance(dset, torch.utils.data.Subset):
            labels = dset.dataset.train_labels[dset.indices]
        else:
            labels = dset.labels
        unique_labels, groupxs = kwarray.group_indices(labels.numpy())
        dset.pccs = [xs.tolist() for xs in groupxs]

    # Give the training dataset an input_id
    datasets['train'].input_id = 'mnist_' + ub.hash_data(
        train_idx.numpy())[0:8]
    return datasets, workdir
Exemplo n.º 3
0
def select_positive_regions(targets, window_dims=(300, 300), thresh=0.0,
                            rng=None, verbose=0):
    """
    Reduce positive example redundency by selecting disparate positive samples

    Example:
        >>> from ndsampler.coco_regions import *
        >>> import kwcoco
        >>> dset = kwcoco.CocoDataset.demo('shapes8')
        >>> targets = tabular_coco_targets(dset)
        >>> window_dims = (300, 300)
        >>> selected = select_positive_regions(targets, window_dims)
        >>> print(len(selected))
        >>> print(len(dset.anns))
    """
    unique_gids, groupxs = kwarray.group_indices(targets['gid'])
    gid_to_groupx = dict(zip(unique_gids, groupxs))
    wh, ww = window_dims
    rng = kwarray.ensure_rng(rng)
    selection = []

    # Get all the bounding boxes
    cxs, cys = ub.take(targets, ['cx', 'cy'])
    n = len(targets)
    cxs = cxs.astype(np.float32)
    cys = cys.astype(np.float32)
    wws = np.full(n, ww, dtype=np.float32)
    whs = np.full(n, wh, dtype=np.float32)
    cxywh = np.hstack([a[:, None] for a in [cxs, cys, wws, whs]])
    boxes = kwimage.Boxes(cxywh, 'cxywh').to_tlbr()

    iter_ = ub.ProgIter(gid_to_groupx.items(),
                        enabled=verbose,
                        label='select positive regions',
                        total=len(gid_to_groupx), adjust=0, freq=32)

    for gid, groupx in iter_:
        # Select all candiate windows in this image
        cand_windows = boxes.take(groupx, axis=0)
        # Randomize which candidate windows have the highest scores so the
        # selection can vary each epoch.
        cand_scores = rng.rand(len(cand_windows))
        cand_dets = kwimage.Detections(boxes=cand_windows, scores=cand_scores)
        # Non-max supresssion is really similar to set-cover
        keep = cand_dets.non_max_supression(thresh=thresh)
        selection.extend(groupx[keep])

    selection = np.array(sorted(selection))
    return selection
Exemplo n.º 4
0
        def _descend(depth, nodes, jdxs):
            """
            Recursively descend the class tree starting at the coursest level.
            At each level we decide if the items will take a category at this
            level of granulatority or try to take a more fine-grained label.

            Args:
                depth (int): current depth in the tree
                nodes (list) : set of sibling nodes at a this level
                jdxs (ArrayLike): item indices that made it to this level (note
                    idxs are used for class indices)
            """
            # Look at the probabilities of each node at this level
            idxs = sorted(self.node_to_idx[node] for node in nodes)
            probs = flat_class_probs[jdxs][:, idxs]

            pred_conf, pred_cx = impl.max_argmax(probs, axis=1)
            pred_idxs = np.array(idxs)[pred_cx]

            # Keep desending on items above the threshold
            # TODO: is there a more intelligent way to do this?
            check_children = pred_conf > thresh

            if impl.any(check_children):
                # Check the children of these nodes
                check_jdxs = jdxs[check_children]
                check_idxs = pred_idxs[check_children]
                group_idxs, groupxs = kwarray.group_indices(check_idxs)
                for idx, groupx in zip(group_idxs, groupxs):
                    node = self.idx_to_node[idx]
                    children = list(self.graph.successors(node))
                    if children:
                        sub_jdxs = check_jdxs[groupx]
                        # See if any fine-grained categories also have high
                        # thresholds.
                        sub_idxs, sub_conf = _descend(depth + 1, children,
                                                      sub_jdxs)
                        sub_flags = sub_conf > thresh
                        # Overwrite course decisions with confident
                        # fine-grained ones.
                        fine_groupx = groupx[sub_flags]
                        fine_idxs = sub_idxs[sub_flags]
                        fine_conf = sub_conf[sub_flags]
                        pred_conf[fine_groupx] = fine_conf
                        pred_idxs[fine_groupx] = fine_idxs
            return pred_idxs, pred_conf
Exemplo n.º 5
0
def labels_to_adjacency_matrix(labels, symmetric=True, diagonal=True):
    """
    Construct an adjacency matrix of matching instances where `labels[i]` is
    the "name" or "identity" of the i-th item. The resulting matrix will have
    values adjm[i, j] == 1 if the i-th and j-th item have the same label and 0
    otherwise.

    Args:
        labels (ndarray): array of labels
        symmetric (bool, default=True): if False only the upper triangle of the
            matrix is populated.
        diagonal (bool, default=True): if False the diagonal is set to zero.

    Returns:
        ndarray: adjm : adjacency matrix

    Example:
        >>> labels = np.array([0, 0, 1, 1])
        >>> labels_to_adjacency_matrix(labels)
        array([[1, 1, 0, 0],
               [1, 1, 0, 0],
               [0, 0, 1, 1],
               [0, 0, 1, 1]], dtype=uint8)
        >>> labels_to_adjacency_matrix(labels, symmetric=False, diagonal=False)
        array([[0, 1, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 1],
               [0, 0, 0, 0]], dtype=uint8)
    """
    import kwarray
    n = len(labels)
    adjm = np.zeros((n, n), dtype=np.uint8)
    unique_labels, groupxs = kwarray.group_indices(labels)
    pos_idxs = [(i, j) for g in groupxs for (i, j) in it.combinations(sorted(g), 2)]
    pos_multi_idxs = tuple(zip(*pos_idxs))
    adjm[pos_multi_idxs] = 1
    if symmetric:
        adjm += adjm.T
    if diagonal:
        np.fill_diagonal(adjm, 1)
    return adjm
Exemplo n.º 6
0
    def groupby(self, by=None, *args, **kwargs):
        """
        Group rows by the value of a column. Unlike pandas this simply
        returns a zip object. To ensure compatiability call list on the
        result of groupby.

        Args:
            by (str): column name to group by
            *args: if specified, the dataframe is coerced to pandas
            *kwargs: if specified, the dataframe is coerced to pandas

        Example:
            >>> df_light = DataFrameLight._demodata(num=7)
            >>> res1 = list(df_light.groupby('bar'))
            >>> # xdoctest: +REQUIRES(module:pandas)
            >>> df_heavy = df_light.pandas()
            >>> res2 = list(df_heavy.groupby('bar'))
            >>> assert len(res1) == len(res2)
            >>> assert all([np.all(a[1] == b[1]) for a, b in zip(res1, res2)])

        Ignore:
            >>> self = DataFrameLight._demodata(num=1000)
            >>> args = ['cx']
            >>> self['cx'] = (np.random.rand(len(self)) * 10).astype(np.int)
            >>> # As expected, our custom restricted implementation is faster
            >>> # than pandas
            >>> ub.Timerit(100).call(lambda: dict(list(self.pandas().groupby('cx')))).print()
            >>> ub.Timerit(100).call(lambda: dict(self.groupby('cx'))).print()
        """
        if len(args) == 0 and len(kwargs) == 0:
            # In this special case we can be fast
            import kwarray
            unique, groupxs = kwarray.group_indices(self[by])
            groups = [self.take(idxs) for idxs in groupxs]
            return zip(unique, groups)
        else:
            # otherwise we need to use the slow method
            return self.pandas().groupby(by=by)
Exemplo n.º 7
0
def _assign_confusion_vectors(true_dets,
                              pred_dets,
                              bg_weight=1.0,
                              iou_thresh=0.5,
                              bg_cidx=-1,
                              bias=0.0,
                              classes=None,
                              compat='all',
                              prioritize='iou',
                              ignore_classes='ignore',
                              max_dets=None):
    """
    Create confusion vectors for detections by assigning to ground true boxes

    Given predictions and truth for an image return (y_pred, y_true,
    y_score), which is suitable for sklearn classification metrics

    Args:
        true_dets (Detections):
            groundtruth with boxes, classes, and weights

        pred_dets (Detections):
            predictions with boxes, classes, and scores

        iou_thresh (float, default=0.5):
            bounding box overlap iou threshold required for assignment

        bias (float, default=0.0):
            for computing bounding box overlap, either 1 or 0

        gids (List[int], default=None):
            which subset of images ids to compute confusion metrics on. If
            not specified all images are used.

        compat (str, default='all'):
            can be ('ancestors' | 'mutex' | 'all').  determines which pred
            boxes are allowed to match which true boxes. If 'mutex', then
            pred boxes can only match true boxes of the same class. If
            'ancestors', then pred boxes can match true boxes that match or
            have a coarser label. If 'all', then any pred can match any
            true, regardless of its category label.

        prioritize (str, default='iou'):
            can be ('iou' | 'class' | 'correct') determines which box to
            assign to if mutiple true boxes overlap a predicted box.  if
            prioritize is iou, then the true box with maximum iou (above
            iou_thresh) will be chosen.  If prioritize is class, then it will
            prefer matching a compatible class above a higher iou. If
            prioritize is correct, then ancestors of the true class are
            preferred over descendents of the true class, over unreleated
            classes.

        bg_cidx (int, default=-1):
            The index of the background class.  The index used in the truth
            column when a predicted bounding box does not match any true
            bounding box.

        classes (List[str] | kwcoco.CategoryTree):
            mapping from class indices to class names. Can also contain class
            heirarchy information.

        ignore_classes (str | List[str]):
            class name(s) indicating ignore regions

        max_dets (int): maximum number of detections to consider

    TODO:
        - [ ] This is a bottleneck function. An implementation in C / C++ /
        Cython would likely improve the overall system.

        - [ ] Implement crowd truth. Allow multiple predictions to match any
              truth objet marked as "iscrowd".

    Returns:
        dict: with relevant confusion vectors. This keys of this dict can be
            interpreted as columns of a data frame. The `txs` / `pxs` columns
            represent the indexes of the true / predicted annotations that were
            assigned as matching. Additionally each row also contains the true
            and predicted class index, the predicted score, the true weight and
            the iou of the true and predicted boxes. A `txs` value of -1 means
            that the predicted box was not assigned to a true annotation and a
            `pxs` value of -1 means that the true annotation was not assigne to
            any predicted annotation.

    Example:
        >>> # xdoctest: +REQUIRES(module:pandas)
        >>> import pandas as pd
        >>> import kwimage
        >>> # Given a raw numpy representation construct Detection wrappers
        >>> true_dets = kwimage.Detections(
        >>>     boxes=kwimage.Boxes(np.array([
        >>>         [ 0,  0, 10, 10], [10,  0, 20, 10],
        >>>         [10,  0, 20, 10], [20,  0, 30, 10]]), 'tlbr'),
        >>>     weights=np.array([1, 0, .9, 1]),
        >>>     class_idxs=np.array([0, 0, 1, 2]))
        >>> pred_dets = kwimage.Detections(
        >>>     boxes=kwimage.Boxes(np.array([
        >>>         [6, 2, 20, 10], [3,  2, 9, 7],
        >>>         [3,  9, 9, 7],  [3,  2, 9, 7],
        >>>         [2,  6, 7, 7],  [20,  0, 30, 10]]), 'tlbr'),
        >>>     scores=np.array([.5, .5, .5, .5, .5, .5]),
        >>>     class_idxs=np.array([0, 0, 1, 2, 0, 1]))
        >>> bg_weight = 1.0
        >>> compat = 'all'
        >>> iou_thresh = 0.5
        >>> bias = 0.0
        >>> import kwcoco
        >>> classes = kwcoco.CategoryTree.from_mutex(list(range(3)))
        >>> bg_cidx = -1
        >>> y = _assign_confusion_vectors(true_dets, pred_dets, bias=bias,
        >>>                               bg_weight=bg_weight, iou_thresh=iou_thresh,
        >>>                               compat=compat)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
           pred  true  score  weight     iou  txs  pxs
        0     1     2 0.5000  1.0000  1.0000    3    5
        1     0    -1 0.5000  1.0000 -1.0000   -1    4
        2     2    -1 0.5000  1.0000 -1.0000   -1    3
        3     1    -1 0.5000  1.0000 -1.0000   -1    2
        4     0    -1 0.5000  1.0000 -1.0000   -1    1
        5     0     0 0.5000  0.0000  0.6061    1    0
        6    -1     0 0.0000  1.0000 -1.0000    0   -1
        7    -1     1 0.0000  0.9000 -1.0000    2   -1

    Ignore:
        from xinspect.dynamic_kwargs import get_func_kwargs
        globals().update(get_func_kwargs(_assign_confusion_vectors))

    Example:
        >>> # xdoctest: +REQUIRES(module:pandas)
        >>> import pandas as pd
        >>> from kwcoco.metrics import DetectionMetrics
        >>> dmet = DetectionMetrics.demo(nimgs=1, nclasses=8,
        >>>                              nboxes=(0, 20), n_fp=20,
        >>>                              box_noise=.2, cls_noise=.3)
        >>> classes = dmet.classes
        >>> gid = 0
        >>> true_dets = dmet.true_detections(gid)
        >>> pred_dets = dmet.pred_detections(gid)
        >>> y = _assign_confusion_vectors(true_dets, pred_dets,
        >>>                               classes=dmet.classes,
        >>>                               compat='all', prioritize='class')
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
        >>> y = _assign_confusion_vectors(true_dets, pred_dets,
        >>>                               classes=dmet.classes,
        >>>                               compat='ancestors', iou_thresh=.5)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
    """
    import kwarray
    valid_compat_keys = {'ancestors', 'mutex', 'all'}
    if compat not in valid_compat_keys:
        raise KeyError(compat)
    if classes is None and compat == 'ancestors':
        compat = 'mutex'

    if compat == 'mutex':
        prioritize = 'iou'

    # Group true boxes by class
    # Keep track which true boxes are unused / not assigned
    unique_tcxs, tgroupxs = kwarray.group_indices(true_dets.class_idxs)
    cx_to_txs = dict(zip(unique_tcxs, tgroupxs))

    unique_pcxs = np.array(sorted(set(pred_dets.class_idxs)))

    if classes is None:
        import kwcoco
        # Build mutually exclusive category tree
        all_cxs = sorted(
            set(map(int, unique_pcxs)) | set(map(int, unique_tcxs)))
        all_cxs = list(range(max(all_cxs) + 1))
        classes = kwcoco.CategoryTree.from_mutex(all_cxs)

    cx_to_ancestors = classes.idx_to_ancestor_idxs()

    if prioritize == 'iou':
        pdist_priority = None  # TODO: cleanup
    else:
        pdist_priority = _fast_pdist_priority(classes, prioritize)

    if compat == 'mutex':
        # assume classes are mutually exclusive if hierarchy is not given
        cx_to_matchable_cxs = {cx: [cx] for cx in unique_pcxs}
    elif compat == 'ancestors':
        cx_to_matchable_cxs = {
            cx: sorted([cx] + sorted(
                ub.take(classes.node_to_idx,
                        nx.ancestors(classes.graph, classes.idx_to_node[cx]))))
            for cx in unique_pcxs
        }
    elif compat == 'all':
        cx_to_matchable_cxs = {cx: unique_tcxs for cx in unique_pcxs}
    else:
        raise KeyError(compat)

    if compat == 'all':
        # In this case simply run the full pairwise iou
        common_true_idxs = np.arange(len(true_dets))
        cx_to_matchable_txs = {cx: common_true_idxs for cx in unique_pcxs}
        common_ious = pred_dets.boxes.ious(true_dets.boxes, bias=bias)
        # common_ious = pred_dets.boxes.ious(true_dets.boxes, impl='c', bias=bias)
        iou_lookup = dict(enumerate(common_ious))
    else:
        # For each pred-category find matchable true-indices
        cx_to_matchable_txs = {}
        for cx, compat_cx in cx_to_matchable_cxs.items():
            matchable_cxs = cx_to_matchable_cxs[cx]
            compat_txs = ub.dict_take(cx_to_txs, matchable_cxs, default=[])
            compat_txs = np.array(sorted(ub.flatten(compat_txs)), dtype=int)
            cx_to_matchable_txs[cx] = compat_txs

        # Batch up the IOU pre-computation between compatible truths / preds
        iou_lookup = {}
        unique_pred_cxs, pgroupxs = kwarray.group_indices(pred_dets.class_idxs)
        for cx, pred_idxs in zip(unique_pred_cxs, pgroupxs):
            true_idxs = cx_to_matchable_txs[cx]
            ious = pred_dets.boxes[pred_idxs].ious(true_dets.boxes[true_idxs],
                                                   bias=bias)
            _px_to_iou = dict(zip(pred_idxs, ious))
            iou_lookup.update(_px_to_iou)

    iou_thresh_list = ([iou_thresh]
                       if not ub.iterable(iou_thresh) else iou_thresh)

    iou_thresh_to_y = {}
    for iou_thresh_ in iou_thresh_list:
        isvalid_lookup = {
            px: ious > iou_thresh_
            for px, ious in iou_lookup.items()
        }

        y = _critical_loop(true_dets,
                           pred_dets,
                           iou_lookup,
                           isvalid_lookup,
                           cx_to_matchable_txs,
                           bg_weight,
                           prioritize,
                           iou_thresh_,
                           pdist_priority,
                           cx_to_ancestors,
                           bg_cidx,
                           ignore_classes=ignore_classes,
                           max_dets=max_dets)
        iou_thresh_to_y[iou_thresh_] = y

    if ub.iterable(iou_thresh):
        return iou_thresh_to_y
    else:
        return y
Exemplo n.º 8
0
    def _make_test_folds(self, X, y=None, groups=None):
        """
        Args:
            X (ndarray): data
            y (ndarray): labels
            groups (ndarray): groupids for items. Items with the same groupid
                must be placed in the same group.

        Returns:
            list: test_folds

        Example:
            >>> import kwarray
            >>> rng = kwarray.ensure_rng(0)
            >>> groups = [1, 1, 3, 4, 2, 2, 7, 8, 8]
            >>> y      = [1, 1, 1, 1, 2, 2, 2, 3, 3]
            >>> X = np.empty((len(y), 0))
            >>> self = StratifiedGroupKFold(random_state=rng, shuffle=True)
            >>> skf_list = list(self.split(X=X, y=y, groups=groups))
            ...
            >>> import ubelt as ub
            >>> print(ub.repr2(skf_list, nl=1, with_dtype=False))
            [
                (np.array([2, 3, 4, 5, 6]), np.array([0, 1, 7, 8])),
                (np.array([0, 1, 2, 7, 8]), np.array([3, 4, 5, 6])),
                (np.array([0, 1, 3, 4, 5, 6, 7, 8]), np.array([2])),
            ]
        """
        import kwarray
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'invalid value')
            n_splits = self.n_splits
            y = np.asarray(y)
            n_samples = y.shape[0]

            unique_y, y_inversed = np.unique(y, return_inverse=True)
            n_classes = max(unique_y) + 1
            unique_groups, group_idxs = kwarray.group_indices(groups)
            grouped_y = kwarray.apply_grouping(y, group_idxs)
            grouped_y_counts = np.array([
                np.bincount(y_, minlength=n_classes) for y_ in grouped_y])

            target_freq = grouped_y_counts.sum(axis=0)
            target_freq = target_freq.astype(np.float)
            target_ratio = target_freq / float(target_freq.sum())

            # Greedilly choose the split assignment that minimizes the local
            # * squared differences in target from actual frequencies
            # * and best equalizes the number of items per fold
            # Distribute groups with most members first
            split_freq = np.zeros((n_splits, n_classes))
            # split_ratios = split_freq / split_freq.sum(axis=1)
            split_ratios = np.ones(split_freq.shape) / split_freq.shape[1]
            split_diffs = ((split_freq - target_ratio) ** 2).sum(axis=1)
            sortx = np.argsort(grouped_y_counts.sum(axis=1))[::-1]
            grouped_splitx = []

            # import ubelt as ub
            # print(ub.repr2(grouped_y_counts, nl=-1))
            # print('target_ratio = {!r}'.format(target_ratio))

            for count, group_idx in enumerate(sortx):
                # print('---------\n')
                group_freq = grouped_y_counts[group_idx]
                cand_freq = (split_freq + group_freq)
                cand_freq = cand_freq.astype(np.float)
                cand_ratio = cand_freq / cand_freq.sum(axis=1)[:, None]
                cand_diffs = ((cand_ratio - target_ratio) ** 2).sum(axis=1)
                # Compute loss
                losses = []
                # others = np.nan_to_num(split_diffs)
                other_diffs = np.array([
                    sum(split_diffs[x + 1:]) + sum(split_diffs[:x])
                    for x in range(n_splits)
                ])
                # penalize unbalanced splits
                ratio_loss = other_diffs + cand_diffs
                # penalize heavy splits
                freq_loss = split_freq.sum(axis=1)
                freq_loss = freq_loss.astype(np.float)
                freq_loss = freq_loss / freq_loss.sum()
                losses = ratio_loss + freq_loss
                #-------
                splitx = np.argmin(losses)
                # print('losses = %r, splitx=%r' % (losses, splitx))
                split_freq[splitx] = cand_freq[splitx]
                split_ratios[splitx] = cand_ratio[splitx]
                split_diffs[splitx] = cand_diffs[splitx]
                grouped_splitx.append(splitx)

            test_folds = np.empty(n_samples, dtype=int)
            for group_idx, splitx in zip(sortx, grouped_splitx):
                idxs = group_idxs[group_idx]
                test_folds[idxs] = splitx

        return test_folds
Exemplo n.º 9
0
def draw_points(xy,
                color='blue',
                class_idxs=None,
                classes=None,
                ax=None,
                alpha=None,
                radius=1,
                **kwargs):
    """

    Args:
        xy (ndarray): of points.

    Example:
        >>> from kwplot.mpl_draw import *  # NOQA
        >>> import kwimage
        >>> xy = kwimage.Points.random(10).xy
        >>> draw_points(xy, radius=0.01)
        >>> draw_points(xy, class_idxs=np.random.randint(0, 3, 10),
        >>>         radius=0.01, classes=['a', 'b', 'c'], color='classes')

    Ignore:
        >>> import kwplot
        >>> kwplot.autompl()
    """
    import kwimage
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    if ax is None:
        ax = plt.gca()

    xy = xy.reshape(-1, 2)

    # More grouped patches == more efficient runtime
    if alpha is None:
        alpha = [1.0] * len(xy)
    elif not ub.iterable(alpha):
        alpha = [alpha] * len(xy)

    if color == 'distinct':
        colors = kwimage.Color.distinct(len(alpha))
    elif color == 'classes':
        # TODO: read colors from categories if they exist
        if class_idxs is None or classes is None:
            raise Exception(
                'cannot draw class colors without class_idxs and classes')
        try:
            cls_colors = kwimage.Color.distinct(len(classes))
        except KeyError:
            raise Exception(
                'cannot draw class colors without class_idxs and classes')
        import kwarray
        _keys, _vals = kwarray.group_indices(class_idxs)
        colors = list(ub.take(cls_colors, class_idxs))
    else:
        colors = [color] * len(alpha)

    ptcolors = [
        kwimage.Color(c, alpha=a).as01('rgba') for c, a in zip(colors, alpha)
    ]
    color_groups = ub.group_items(range(len(ptcolors)), ptcolors)

    circlekw = {
        'radius': radius,
        'fill': True,
        'ec': None,
    }
    if 'fc' in kwargs:
        import warnings
        warnings.warning('Warning: specifying fc to Points.draw overrides '
                         'the color argument. Use color instead')
    circlekw.update(kwargs)
    fc = circlekw.pop('fc', None)  # hack

    collections = []
    for pcolor, idxs in color_groups.items():

        # hack for fc
        if fc is not None:
            pcolor = fc

        patches = [
            mpl.patches.Circle((x, y), fc=pcolor, **circlekw)
            for x, y in xy[idxs]
        ]
        col = mpl.collections.PatchCollection(patches, match_original=True)
        collections.append(col)
        ax.add_collection(col)
    return collections
Exemplo n.º 10
0
    def from_coco(KW18, coco_dset):
        import kwimage
        raw = {col: None for col in KW18.DEFAULT_COLUMNS}
        anns = coco_dset.dataset['annotations']
        boxes = kwimage.Boxes(np.array([ann['bbox'] for ann in anns]), 'xywh')
        tlbr = boxes.to_tlbr()
        cxywh = tlbr.to_cxywh()
        tl_x, tl_y, br_x, br_y = tlbr.data.T

        cx = cxywh.data[:, 0]
        cy = cxywh.data[:, 1]

        # Create track ids if not given
        track_ids = np.array([ann.get('track_id', np.nan) for ann in anns])
        missing = np.isnan(track_ids)
        valid_track_ids = track_ids[~missing]
        if len(valid_track_ids) == 0:
            next_track_id = 1
        else:
            next_track_id = valid_track_ids.max() + 1
        num_need = np.sum(missing)
        new_track_ids = np.arange(next_track_id, next_track_id + num_need)
        track_ids[missing] = new_track_ids
        track_ids = track_ids.astype(int)

        scores = np.array([ann.get('score', -1) for ann in anns])
        image_ids = np.array([ann['image_id'] for ann in anns])
        cids = np.array([ann.get('category_id', -1) for ann in anns])

        num = len(anns)

        raw['track_id'] = track_ids
        raw['track_length'] = np.full(num, fill_value=-1)
        raw['frame_number'] = image_ids

        raw['tracking_plane_loc_x'] = cx
        raw['tracking_plane_loc_y'] = cy

        raw['velocity_x'] = np.full(num, fill_value=0)
        raw['velocity_y'] = np.full(num, fill_value=0)

        raw['image_loc_x'] = cx
        raw['image_loc_y'] = cy

        raw['img_bbox_tl_x'] = tl_x
        raw['img_bbox_tl_y'] = tl_y
        raw['img_bbox_br_x'] = br_x
        raw['img_bbox_br_y'] = br_y

        raw['area'] = boxes.area.ravel()

        raw['world_loc_x'] = np.full(num, fill_value=-1)
        raw['world_loc_y'] = np.full(num, fill_value=-1)
        raw['world_loc_z'] = np.full(num, fill_value=-1)

        raw['timestamp'] = np.full(num, fill_value=-1)

        raw['confidence'] = scores
        raw['object_type_id'] = cids

        raw = {k: v for k, v in raw.items() if v is not None}

        track_ids, groupxs = kwarray.group_indices(raw['track_id'])
        for groupx in groupxs:
            raw['track_length'][groupx] = len(groupx)

        self = KW18(raw)
        return self
Exemplo n.º 11
0
        def _entropy_refine(depth, nodes, jdxs):
            """
            Recursively descend the class tree starting at the coursest level.
            At each level we decide if the items will take a category at this
            level of granulatority or try to take a more fine-grained label.

            Args:
                depth (int): current depth in the tree
                nodes (list) : set of sibling nodes at a this level
                jdxs (ArrayLike): item indices that made it to this level (note
                    idxs are used for class indices)
            """
            if DEBUG:
                print(ub.color_text('* REFINE nodes={}'.format(nodes), 'blue'))
            # Look at the probabilities of each node at this level
            idxs = sorted(self.node_to_idx[node] for node in nodes)
            if ignore_class_idxs:
                ignore_nodes = set(ub.take(self.idx_to_node, ignore_class_idxs))
                idxs = sorted(set(idxs) - set(ignore_class_idxs))
                if len(idxs) == 0:
                    raise ValueError('Cannot ignore all top-level classes')
            probs = flat_class_probs[jdxs][:, idxs]

            # Choose a highest probability category to predict at this level
            pred_conf, pred_cx = impl.max_argmax(probs, axis=1)
            pred_idxs = np.array(idxs)[impl.numpy(pred_cx)]

            # Group each example which predicted the same class at this level
            group_idxs, groupxs = kwarray.group_indices(pred_idxs)
            if DEBUG:
                groupxs = list(ub.take(groupxs, group_idxs.argsort()))
                group_idxs = group_idxs[group_idxs.argsort()]
                # print('groupxs = {!r}'.format(groupxs))
                # print('group_idxs = {!r}'.format(group_idxs))

            for idx, groupx in zip(group_idxs, groupxs):
                # Get the children of this node (idx)
                node = self.idx_to_node[idx]
                children = sorted(self.graph.successors(node))
                if ignore_class_idxs:
                    children = sorted(set(children) - ignore_nodes)

                if children:
                    # Check if it would be simple to refine the coarse category
                    # current prediction into one of its finer-grained child
                    # categories. Do this by considering the entropy at this
                    # level if we replace this coarse-node with the child
                    # fine-nodes. Then compare that entropy to what we would
                    # get if we were perfectly uncertain about the child node
                    # prediction (i.e. the worst case). If the entropy we get
                    # is much lower than the worst case, then it is simple to
                    # descend the tree and predict a finer-grained label.

                    # Expand this node into all of its children
                    child_idxs = set(self.node_to_idx[child] for child in children)

                    # Get example indices (jdxs) assigned to category idx
                    groupx.sort()
                    group_jdxs = jdxs[groupx]

                    # Expand this parent node, but keep the parent's siblings
                    ommer_idxs = sorted(set(idxs) - {idx})  # Note: ommer = Aunt/Uncle
                    expanded_idxs = sorted(ommer_idxs) + sorted(child_idxs)
                    expanded_probs = flat_class_probs[group_jdxs][:, expanded_idxs]

                    # Compute the entropy of the expanded distribution
                    h_expanded = _criterion(expanded_probs)

                    # Probability assigned to the parent
                    p_parent = flat_class_probs[group_jdxs][:, idx:idx + 1]
                    # Get the absolute probabilities assigned the parents siblings
                    ommer_probs = flat_class_probs[group_jdxs][:, sorted(ommer_idxs)]

                    # Compute the worst-case entropy after expanding the node
                    # In the worst case the parent probability is distributed
                    # uniformly among all of its children
                    c = len(children)
                    child_probs_worst = impl.tile(p_parent / c, reps=[1, c])
                    expanded_probs_worst = impl.hstack([ommer_probs, child_probs_worst])
                    h_expanded_worst = _criterion(expanded_probs_worst)

                    # Normalize the entropy we got by the worst case.
                    # eps = float(np.finfo(np.float32).min)
                    eps = 1e-30
                    complexity_ratio = h_expanded / (h_expanded_worst + eps)
                    simplicity_ratio = 1 - complexity_ratio

                    # If simplicity ratio is over a threshold refine the parent
                    refine_flags = simplicity_ratio > thresh

                    if always_refine_idxs is not None:
                        if idx in always_refine_idxs:
                            refine_flags[:] = 1

                    if len(child_idxs) == 1:
                        # hack: always refine when there is one child, in this
                        # case the simplicity measure will always be zero,
                        # which is likely a problem with this criterion.
                        refine_flags[:] = 1

                    refine_flags = kwarray.ArrayAPI.numpy(refine_flags).astype(np.bool)

                    if DEBUG:
                        print('-----------')
                        print('idx = {!r}'.format(idx))
                        print('node = {!r}'.format(self.idx_to_node[idx]))
                        print('ommer_idxs = {!r}'.format(ommer_idxs))
                        print('ommer_nodes = {!r}'.format(
                            list(ub.take(self.idx_to_node, ommer_idxs))))
                        print('depth = {!r}'.format(depth))
                        import pandas as pd
                        print('expanded_probs =\n{}'.format(
                            ub.repr2(expanded_probs, precision=2,
                                     with_dtype=0, supress_small=True)))
                        df = pd.DataFrame({
                            'h': h_expanded,
                            'h_worst': h_expanded_worst,
                            'c_ratio': complexity_ratio,
                            's_ratio': simplicity_ratio,
                            'flags': refine_flags.astype(np.uint8)
                        })
                        print(df)
                        print('-----------')

                    if np.any(refine_flags):
                        refine_jdxs = group_jdxs[refine_flags]
                        refine_idxs, refine_conf = _entropy_refine(depth + 1, children, refine_jdxs)
                        # Overwrite course decisions with refined decisions.
                        refine_groupx = groupx[refine_flags]
                        pred_idxs[refine_groupx] = refine_idxs
                        pred_conf[refine_groupx] = refine_conf
            return pred_idxs, pred_conf
Exemplo n.º 12
0
def main():
    """
    Say we have a produces an assignment between true detections within images
    and some set of predictions.
    """
    import numpy as np
    import ubelt as ub

    # Create demo detection metrics
    from kwcoco.metrics import DetectionMetrics
    dmet = DetectionMetrics.demo(nimgs=1000,
                                 nboxes=(0, 10),
                                 n_fp=(0, 10),
                                 n_fn=(0, 10))

    # We might have some sort of mapping between images and the predicted and
    # true boxes (note gid means imaGe id).
    gid_to_true = dmet.gid_to_true_dets
    gid_to_pred = dmet.gid_to_pred_dets
    print('gid_to_true = {}'.format(str(gid_to_true)[0:100] + ' ...'))
    print('gid_to_pred = {}'.format(str(gid_to_pred)[0:100] + ' ...'))
    """
    gid_to_true = {0: <Detections(5) at 0x7fe08c335a10>, 1: <Detections(5) at 0x7fe08c3359d0>, 2: <Detections(8) at 0x ...
    gid_to_pred = {0: <Detections(2) at 0x7fe08c335990>, 1: <Detections(6) at 0x7fe08c335dd0>, 2: <Detections(13) at 0 ...
    """

    # Each detection might have data like this
    print('gid_to_true[0].data = {}'.format(ub.repr2(gid_to_true[0].data,
                                                     nl=1)))
    """
    gid_to_true[0].data = {
        'boxes': <Boxes(cxywh,
                     array([[74.07547  , 61.581673 , 24.438194 , 47.287003 ],
                            [28.509544 , 26.718906 ,  3.487833 , 43.095215 ],
                            [60.247677 , 65.802795 , 42.938393 , 36.610165 ],
                            [35.281883 , 80.26636  ,  4.0845375, 31.898323 ],
                            [30.69794  , 83.549904 , 34.32573  ,  7.9176483]], dtype=float32))>,
        'class_idxs': np.array([1, 1, 1, 1, 1], dtype=np.int64),
        'weights': np.array([1, 1, 1, 1, 1], dtype=np.int32),
    }
    """

    # we can compute an association between each box and get a flat table
    table = dmet.confusion_vectors().data

    # The table of values might look something like this.
    # Again, note the gids correspond to imaGe ids
    # txs correspond to indexes of true detections in that image
    # pxs correspond to indexes of predicted detections in that image
    # A -1 in an index value means the row is unassociated
    print(table.pandas()[['gid', 'txs', 'pxs']])
    """
          gid  txs  pxs
    0       0    3    0
    1       0    4    1
    2       0    0   -1
    3       0    1   -1
    4       0    2   -1
    ...   ...  ...  ...
    9881  999   -1    1
    9882  999   -1    3
    9883  999   -1    2
    9884  999    0   -1
    9885  999    1   -1

    """

    # Say we need to know some attribute (e.g. the bounding boxes) for all of
    # the true associations, but the table is already flattened. (multiple
    # duplicate gids per row). How do we access that data?

    # We could use a list comprehension and lookup the Detections object for
    # that image and then look up the index within the image:
    data_attr_v1 = np.array(
        [[-1] * 4 if tx == -1 else gid_to_true[gid].data['boxes'].data[tx]
         for gid, tx in zip(table['gid'], table['txs'])])

    # But that means we are accessing the __getitem__ of gid_to_true a lot
    # Is there a better way?

    # Yes, we can group the table by image id.
    import kwarray
    data_attr_v2 = np.full((len(table), 4), fill_value=-1.0)
    unique_gids, groupxs = kwarray.group_indices(table['gid'])
    for gid, groupxs in zip(unique_gids, groupxs):
        true_det = gid_to_true[gid]
        image_txs = table['txs'][groupxs]
        valid_flags = image_txs != -1
        valid_txs = image_txs[valid_flags]
        valid_groupxs = groupxs[valid_flags]
        valid_attr = true_det.data['boxes'].data[valid_txs]
        data_attr_v2[valid_groupxs] = valid_attr

    # We can see both codeblocks are the same, but which is faster
    assert np.all(data_attr_v2 == data_attr_v1)

    import timerit
    ti = timerit.Timerit(50, bestof=10, verbose=2)
    for timer in ti.reset('list-comprehension'):
        with timer:
            data_attr_v1 = np.array(
                [[-1] *
                 4 if tx == -1 else gid_to_true[gid].data['boxes'].data[tx]
                 for gid, tx in zip(table['gid'], table['txs'])])

    for timer in ti.reset('grouping'):
        with timer:
            data_attr_v2 = np.full((len(table), 4), fill_value=-1.0)
            unique_gids, groupxs = kwarray.group_indices(table['gid'])
            for gid, groupxs in zip(unique_gids, groupxs):
                true_det = gid_to_true[gid]
                image_txs = table['txs'][groupxs]
                valid_flags = image_txs != -1
                valid_txs = image_txs[valid_flags]
                valid_groupxs = groupxs[valid_flags]
                valid_attr = true_det.data['boxes'].data[valid_txs]
                data_attr_v2[valid_groupxs] = valid_attr
Exemplo n.º 13
0
    def draw(self, color='blue', ax=None, alpha=None, radius=1, **kwargs):
        """
        TODO: can use kwplot.draw_points

        Example:
            >>> # xdoc: +REQUIRES(module:kwplot)
            >>> from kwimage.structs.points import *  # NOQA
            >>> pts = Points.random(10)
            >>> # xdoc: +REQUIRES(--show)
            >>> pts.draw(radius=0.01)

            >>> from kwimage.structs.points import *  # NOQA
            >>> self = Points.random(10, classes=['a', 'b', 'c'])
            >>> self.draw(radius=0.01, color='classes')
        """
        import kwimage
        import matplotlib as mpl
        from matplotlib import pyplot as plt
        if ax is None:
            ax = plt.gca()
        xy = self.data['xy'].data.reshape(-1, 2)

        # More grouped patches == more efficient runtime
        if alpha is None:
            alpha = [1.0] * len(xy)
        elif not ub.iterable(alpha):
            alpha = [alpha] * len(xy)

        if color == 'distinct':
            colors = kwimage.Color.distinct(len(alpha))
        elif color == 'classes':
            # TODO: read colors from categories if they exist
            try:
                class_idxs = self.data['class_idxs']
                cls_colors = kwimage.Color.distinct(len(self.meta['classes']))
            except KeyError:
                raise Exception('cannot draw class colors without class_idxs and classes')
            _keys, _vals = kwarray.group_indices(class_idxs)
            colors = list(ub.take(cls_colors, class_idxs))
        else:
            colors = [color] * len(alpha)

        ptcolors = [kwimage.Color(c, alpha=a).as01('rgba')
                    for c, a in zip(colors, alpha)]
        color_groups = ub.group_items(range(len(ptcolors)), ptcolors)

        circlekw = {
            'radius': radius,
            'fill': True,
            'ec': None,
        }
        if 'fc' in kwargs:
            warnings.warning(
                'Warning: specifying fc to Points.draw overrides '
                'the color argument. Use color instead')
        circlekw.update(kwargs)
        fc = circlekw.pop('fc', None)  # hack

        collections = []
        for pcolor, idxs in color_groups.items():

            # hack for fc
            if fc is not None:
                pcolor = fc

            patches = [
                mpl.patches.Circle((x, y), fc=pcolor, **circlekw)
                for x, y in xy[idxs]
            ]
            col = mpl.collections.PatchCollection(patches, match_original=True)
            collections.append(col)
            ax.add_collection(col)
        return collections
Exemplo n.º 14
0
    def draw_on(self, image, color='white', radius=None, copy=False):
        """
        CommandLine:
            xdoctest -m ~/code/kwimage/kwimage/structs/points.py Points.draw_on --show

        Example:
            >>> # xdoc: +REQUIRES(module:kwplot)
            >>> from kwimage.structs.points import *  # NOQA
            >>> s = 128
            >>> image = np.zeros((s, s))
            >>> self = Points.random(10).scale(s)
            >>> image = self.draw_on(image)
            >>> # xdoc: +REQUIRES(--show)
            >>> import kwplot
            >>> kwplot.figure(fnum=1, doclf=True)
            >>> kwplot.autompl()
            >>> kwplot.imshow(image)
            >>> self.draw(radius=3, alpha=.5)
            >>> kwplot.show_if_requested()

        Example:
            >>> # xdoc: +REQUIRES(module:kwplot)
            >>> from kwimage.structs.points import *  # NOQA
            >>> s = 128
            >>> image = np.zeros((s, s))
            >>> self = Points.random(10).scale(s)
            >>> image = self.draw_on(image, radius=3, color='distinct')
            >>> # xdoc: +REQUIRES(--show)
            >>> import kwplot
            >>> kwplot.figure(fnum=1, doclf=True)
            >>> kwplot.autompl()
            >>> kwplot.imshow(image)
            >>> self.draw(radius=3, alpha=.5, color='classes')
            >>> kwplot.show_if_requested()

        Example:
            >>> import kwimage
            >>> s = 32
            >>> self = kwimage.Points.random(10).scale(s)
            >>> color = 'blue'
            >>> # Test drawong on all channel + dtype combinations
            >>> im3 = np.zeros((s, s, 3), dtype=np.float32)
            >>> im_chans = {
            >>>     'im3': im3,
            >>>     'im1': kwimage.convert_colorspace(im3, 'rgb', 'gray'),
            >>>     'im4': kwimage.convert_colorspace(im3, 'rgb', 'rgba'),
            >>> }
            >>> inputs = {}
            >>> for k, im in im_chans.items():
            >>>     inputs[k + '_01'] = (kwimage.ensure_float01(im.copy()), {'radius': None})
            >>>     inputs[k + '_255'] = (kwimage.ensure_uint255(im.copy()), {'radius': None})
            >>> outputs = {}
            >>> for k, v in inputs.items():
            >>>     im, kw = v
            >>>     outputs[k] = self.draw_on(im, color=color, **kw)
            >>> # xdoc: +REQUIRES(--show)
            >>> import kwplot
            >>> kwplot.figure(fnum=2, doclf=True)
            >>> kwplot.autompl()
            >>> pnum_ = kwplot.PlotNums(nCols=2, nRows=len(inputs))
            >>> for k in inputs.keys():
            >>>     kwplot.imshow(inputs[k][0], fnum=2, pnum=pnum_(), title=k)
            >>>     kwplot.imshow(outputs[k], fnum=2, pnum=pnum_(), title=k)
            >>> kwplot.show_if_requested()
        """
        import kwimage

        dtype_fixer = _generic._consistent_dtype_fixer(image)

        if radius is None:
            if color == 'distinct':
                raise NotImplementedError
            image = kwimage.atleast_3channels(image)
            image = kwimage.ensure_float01(image, copy=copy)
            # value = kwimage.Color(color).as01()
            value = kwimage.Color(color)._forimage(image)
            image = self.data['xy'].fill(
                image, value, coord_axes=[1, 0], interp='bilinear')
        else:
            import cv2
            image = kwimage.atleast_3channels(image, copy=copy)
            # note: ellipse has a different return type (UMat) and does not
            # work inplace if the input is not contiguous.
            image = np.ascontiguousarray(image)

            xy_pts = self.data['xy'].data.reshape(-1, 2)

            if color == 'distinct':
                colors = kwimage.Color.distinct(len(xy_pts))
            elif color == 'classes':
                # TODO: read colors from categories if they exist
                class_idxs = self.data['class_idxs']
                _keys, _vals = kwarray.group_indices(class_idxs)
                cls_colors = kwimage.Color.distinct(len(self.meta['classes']))
                colors = list(ub.take(cls_colors, class_idxs))
                colors = [kwimage.Color(c)._forimage(image) for c in colors]
                # if image.dtype.kind == 'f':
                #     colors = [kwimage.Color(c).as01() for c in colors]
                # else:
                #     colors = [kwimage.Color(c).as255() for c in colors]
            else:
                value = kwimage.Color(color)._forimage(image)
                colors = [value] * len(xy_pts)
                # image = kwimage.ensure_float01(image)

            for xy, color_ in zip(xy_pts, colors):
                # center = tuple(map(int, xy.tolist()))
                center = tuple(xy.tolist())
                axes = (radius / 2, radius / 2)
                center = tuple(map(int, center))
                axes = tuple(map(int, axes))
                # print('center = {!r}'.format(center))
                # print('axes = {!r}'.format(axes))

                cv2.ellipse(image, center, axes, angle=0.0, startAngle=0.0,
                            endAngle=360.0, color=color_, thickness=-1)

        image = dtype_fixer(image, copy=False)
        return image