示例#1
0
 def _set_pos_redun_flag(infr, nid, flag):
     """
     Flags or unflags an nid as positive redundant.
     """
     was_pos_redun = nid in infr.pos_redun_nids
     if flag:
         if not was_pos_redun:
             infr.print('pos_redun flag=T nid=%r' % (nid,), 5)
         else:
             infr.print('pos_redun flag=T nid=%r (already done)' % (nid,), 6)
         infr.pos_redun_nids.add(nid)
         cc = infr.pos_graph.component(nid)
         infr.remove_internal_priority(cc)
         if infr.params['inference.update_attrs']:
             infr.set_edge_attrs(
                 'inferred_state',
                 ub.dzip(nxu.edges_inside(infr.graph, cc), ['same'])
             )
     else:
         if was_pos_redun:
             infr.print('pos_redun flag=F nid=%r' % (nid,), 5)
         else:
             infr.print('pos_redun flag=F nid=%r (already done)' % (nid,), 6)
         cc = infr.pos_graph.component(nid)
         infr.pos_redun_nids -= {nid}
         infr.reinstate_internal_priority(cc)
         if infr.params['inference.update_attrs']:
             infr.set_edge_attrs(
                 'inferred_state',
                 ub.dzip(nxu.edges_inside(infr.graph, cc), [None])
             )
示例#2
0
def test_dzip_errors():
    with pytest.raises(TypeError):
        ub.dzip([1], 2)
    with pytest.raises(TypeError):
        ub.dzip(1, [2])
    with pytest.raises(ValueError):
        ub.dzip([1, 2, 3], [])
    with pytest.raises(ValueError):
        ub.dzip([], [4, 5, 6])
    with pytest.raises(ValueError):
        ub.dzip([1, 2, 3], [4, 5])
示例#3
0
    def on_between(infr,
                   edge,
                   decision,
                   prev_decision,
                   nid1,
                   nid2,
                   merge_nid=None):
        """
        Callback when a review is made between two PCCs
        """
        action = ['between']

        infr._update_neg_metagraph(decision,
                                   prev_decision,
                                   nid1,
                                   nid2,
                                   merge_nid=merge_nid)

        if merge_nid is not None:
            # A merge occurred
            if infr.params['inference.update_attrs']:
                cc = infr.pos_graph.component(merge_nid)
                infr.set_node_attrs('name_label', ub.dzip(cc, [merge_nid]))
            # FIXME: this state is ugly
            action += ['merge']
        else:
            if decision == NEGTV:
                action += ['neg-evidence']
            elif decision == INCMP:
                action += ['incomp-evidence']
            else:
                action += ['other-evidence']
        return action
示例#4
0
文件: boids.py 项目: Kitware/kwcoco
    def update_neighbors(self):
        # TODO: this should be done with a fast spatial index, but
        # unfortunately I don't see any existing implementations that make it
        # easy to support moving points.
        utriu_dists = pdist(self.pos)
        utriu_flags = utriu_dists < self.config['perception_thresh']
        utriu_rx, utriu_cx = np.triu_indices(len(self.pos), k=1)

        utriu_neighb_rxs = utriu_rx[utriu_flags]
        utriu_neighb_cxs = utriu_cx[utriu_flags]

        neighb_rxs = np.r_[utriu_neighb_rxs, utriu_neighb_cxs]
        neighb_cxs = np.r_[utriu_neighb_cxs, utriu_neighb_rxs]

        group_rxs, groupxs = kwarray.group_indices(neighb_rxs)
        group_cxs = kwarray.apply_grouping(neighb_cxs, groupxs)

        rx_to_neighb_cxs = ub.dzip(group_rxs, group_cxs)

        # n = len(self.pos)
        # rx_to_neighb_utriu_idxs = {}
        # for rx, cxs in rx_to_neighb_cxs.items():
        #     rxs = np.full_like(cxs, fill_value=rx)
        #     multi_index = (rxs, cxs)
        #     utriu_idxs = triu_condense_multi_index(
        #         multi_index, dims=(n, n), symetric=True)
        #     rx_to_neighb_utriu_idxs[rx] = utriu_idxs

        # self.utriu_dists = utriu_dists
        self.rx_to_neighb_cxs = rx_to_neighb_cxs
        # self.rx_to_neighb_utriu_idxs = rx_to_neighb_utriu_idxs

        # Compute speed and direction of every boid
        self.speeds = np.linalg.norm(self.vel, axis=1)
        self.dirs = self.vel / self.speeds[:, None]
示例#5
0
    def init_test_mode(infr):
        from graphid.core import nx_dynamic_graph
        infr.print('init_test_mode')
        infr.test_mode = True
        # infr.edge_truth = {}
        infr.metrics_list = []
        infr.test_state = {
            'n_decision': 0,
            'n_algo': 0,
            'n_manual': 0,
            'n_true_merges': 0,
            'n_error_edges': 0,
            'confusion': None,
        }
        infr.test_gt_pos_graph = nx_dynamic_graph.DynConnGraph()
        infr.test_gt_pos_graph.add_nodes_from(infr.aids)
        infr.nid_to_gt_cc = ub.group_items(infr.aids, infr.orig_name_labels)
        infr.node_truth = ub.dzip(infr.aids, infr.orig_name_labels)

        # infr.real_n_pcc_mst_edges = sum(
        #     len(cc) - 1 for cc in infr.nid_to_gt_cc.values())
        # util.cprint('real_n_pcc_mst_edges = %r' % (
        #     infr.real_n_pcc_mst_edges,), 'red')

        infr.metrics_list = []
        infr.nid_to_gt_cc = ub.group_items(infr.aids, infr.orig_name_labels)
        infr.real_n_pcc_mst_edges = sum(
            len(cc) - 1 for cc in infr.nid_to_gt_cc.values())
        infr.print('real_n_pcc_mst_edges = %r' % (infr.real_n_pcc_mst_edges, ),
                   color='red')
示例#6
0
def _query_nvidia_smi(mode, fields):
    """
    Runs nvidia smi in query mode

    Args:
        mode (str): the query cli flag to pass to nvidia-smi
        fields (List[str]): csv header fields to query

    Returns:
        List[Dict[str, str]]: parsed csv output
    """
    header = ','.join(fields)
    command = [
        'nvidia-smi', '--{}={}'.format(mode, header), '--format=csv,noheader'
    ]
    info = ub.cmd(command)
    if info['ret'] != 0:
        print(info['out'])
        print(info['err'])
        raise Exception('unable to call nvidia-smi: ret={}'.format(
            info['ret']))
    rows = []
    for line in info['out'].split('\n'):
        line = line.strip()
        if line:
            parts = [p.strip() for p in line.split(',')]
            row = ub.dzip(fields, parts)
            rows.append(row)
    return rows
示例#7
0
 def _set_error_edges(infr, nid, new_error_edges):
     # flag error edges
     infr.nid_to_errors[nid] = new_error_edges
     # choose one and give it insanely high priority
     if infr.params['inference.update_attrs']:
         infr.set_edge_attrs('maybe_error', ub.dzip(new_error_edges,
                                                    [True]))
     infr._increase_priority(new_error_edges, 10)
示例#8
0
    def apply_match_scores(infr):
        """

        Applies precomputed matching scores to edges that already exist in the
        graph. Typically you should run infr.apply_match_edges() before running
        this.

        Example:
            >>> # ENABLE_DOCTEST
            >>> infr = testdata_infr('PZ_MTEST')
            >>> infr.exec_matching()
            >>> infr.apply_match_edges()
            >>> infr.apply_match_scores()
            >>> infr.get_edge_attrs('score')
        """
        if infr.cm_list is None:
            infr.print('apply_match_scores - no scores to apply!')
            return
        infr.print('apply_match_scores', 1)
        edges = list(infr.graph.edges())
        edge_to_data = infr._get_cm_edge_data(edges)

        # Remove existing attrs
        util.nx_delete_edge_attr(infr.graph, 'score')
        util.nx_delete_edge_attr(infr.graph, 'rank')
        util.nx_delete_edge_attr(infr.graph, 'normscore')

        edges = list(edge_to_data.keys())
        edge_scores = list(util.take_column(edge_to_data.values(), 'score'))
        edge_scores = util.replace_nones(edge_scores, np.nan)
        edge_scores = np.array(edge_scores)
        edge_ranks = np.array(util.take_column(edge_to_data.values(), 'rank'))
        # take the inf-norm
        normscores = edge_scores / util.safe_max(edge_scores, nans=False)

        # Add new attrs
        infr.set_edge_attrs('score', ub.dzip(edges, edge_scores))
        infr.set_edge_attrs('rank', ub.dzip(edges, edge_ranks))

        # Hack away zero probabilites
        # probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9
        # probs = util.normalize(probs, axis=1, ord=1, out=probs)
        # entropy = -(np.log2(probs) * probs).sum(axis=1)
        infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
示例#9
0
    def hardcase_review_gen(infr):
        """
        Subiterator for hardcase review

        Re-review non-confident edges that vsone did not classify correctly
        """
        infr.print('==============================', color='white')
        infr.print('--- HARDCASE PRIORITY LOOP ---', color='white')

        verifiers = infr.learn_evaluation_verifiers()
        verif = verifiers['match_state']

        edges_ = list(infr.edges())
        real_ = list(infr.edge_decision_from(edges_))
        flags_ = [r in {POSTV, NEGTV, INCMP} for r in real_]
        real = list(ub.compress(real_, flags_))
        edges = list(ub.compress(edges_, flags_))

        hardness = 1 - verif.easiness(edges, real)

        if True:
            df = pd.DataFrame({'edges': edges, 'real': real})
            df['hardness'] = hardness

            pred = verif.predict(edges)
            df['pred'] = pred.values

            df.sort_values('hardness', ascending=False)
            infr.print('hardness analysis')
            infr.print(str(df))

            infr.print('infr status: ' + ub.repr2(infr.status()))

        # Don't re-review anything that was confidently reviewed
        # CONFIDENCE = const.CONFIDENCE
        # CODE_TO_INT = CONFIDENCE.CODE_TO_INT.copy()
        # CODE_TO_INT[CONFIDENCE.CODE.UNKNOWN] = 0
        # conf = ub.take(CODE_TO_INT, infr.gen_edge_values(
        #     'confidence', edges, on_missing='default',
        #     default=CONFIDENCE.CODE.UNKNOWN))

        # This should only be run with certain params
        assert not infr.params['autoreview.enabled']
        assert not infr.params['redun.enabled']
        assert not infr.params['ranking.enabled']
        assert infr.params['inference.enabled']
        # const.CONFIDENCE.CODE.PRETTY_SURE
        if infr.params['queue.conf.thresh'] is None:
            # != 'pretty_sure':
            infr.print('WARNING: should queue.conf.thresh = "pretty_sure"?')

        # work around add_candidate_edges
        infr.prioritize(metric='hardness', edges=edges, scores=hardness)
        infr.set_edge_attrs('hardness', ub.dzip(edges, hardness))
        yield from infr._inner_priority_gen(use_refresh=False)
示例#10
0
def _precompute_class_weights(dset, workers=0, mode='median-idf'):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize()
        >>> dset = harn.datasets['train']
    """

    assert mode in ['median-idf', 'log-median-idf']

    total_freq = _cached_class_frequency(dset, workers=workers)

    def logb(arr, base):
        if base == 'e':
            return np.log(arr)
        elif base == 2:
            return np.log2(arr)
        elif base == 10:
            return np.log10(arr)
        else:
            out = np.log(arr)
            out /= np.log(base)
            return out

    _min, _max = np.percentile(total_freq, [5, 95])
    is_valid = (_min <= total_freq) & (total_freq <= _max)
    if np.any(is_valid):
        middle_value = np.median(total_freq[is_valid])
    else:
        middle_value = np.median(total_freq)

    # variant of median-inverse-frequency
    nonzero_freq = total_freq[total_freq != 0]
    if len(nonzero_freq):
        total_freq[total_freq == 0] = nonzero_freq.min() / 2

    if mode == 'median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
    elif mode == 'log-median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
        base = 2
        base = np.exp(1)
        weights = logb(weights + (base - 1), base)
        weights = np.maximum(weights, .1)
        weights = np.minimum(weights, 10)
    else:
        raise KeyError('mode = {!r}'.format(mode))

    weights = np.round(weights, 2)
    cname_to_weight = ub.dzip(dset.classes, weights)
    print('weights: ' + ub.repr2(cname_to_weight))
    return weights
示例#11
0
def color_nodes(graph,
                labelattr='label',
                brightness=.878,
                outof=None,
                sat_adjust=None):
    """ Colors edges and nodes by nid """
    node_to_lbl = nx.get_node_attributes(graph, labelattr)
    unique_lbls = sorted(set(node_to_lbl.values()))
    ncolors = len(unique_lbls)
    if outof is None:
        if (ncolors) == 1:
            unique_colors = [util.Color('lightblue').as01()]
        elif (ncolors) == 2:
            # https://matplotlib.org/examples/color/named_colors.html
            unique_colors = ['royalblue', 'orange']
            unique_colors = [util.Color(c).as01('bgr') for c in unique_colors]
        else:
            unique_colors = util.distinct_colors(ncolors,
                                                 brightness=brightness)
    else:
        unique_colors = util.distinct_colors(outof, brightness=brightness)

    if sat_adjust:
        unique_colors = [
            util.Color(c).adjust_hsv(0.0, sat_adjust, 0.0)
            for c in unique_colors
        ]
    # Find edges and aids strictly between two nids
    if outof is None:
        lbl_to_color = ub.dzip(unique_lbls, unique_colors)
    else:
        gray = util.Color('lightgray').as01('bgr')
        unique_colors = [gray] + unique_colors
        offset = max(1, min(unique_lbls)) - 1
        node_to_lbl = ub.map_vals(lambda nid: max(0, nid - offset),
                                  node_to_lbl)
        lbl_to_color = ub.dzip(range(outof + 1), unique_colors)
    node_to_color = ub.map_vals(lbl_to_color, node_to_lbl)
    nx.set_node_attributes(graph, name='color', values=node_to_color)
    nx_ensure_agraph_color(graph)
示例#12
0
 def _purge_error_edges(infr, nid):
     """
     Removes all error edges associated with a PCC so they can be recomputed
     or resolved.
     """
     old_error_edges = infr.nid_to_errors.pop(nid, [])
     # Remove priority from old error edges
     if infr.params['inference.update_attrs']:
         infr.set_edge_attrs('maybe_error', ub.dzip(old_error_edges,
                                                    [None]))
     infr._remove_edge_priority(old_error_edges)
     was_clean = len(old_error_edges) > 0
     return was_clean
示例#13
0
    def on_within(infr, edge, decision, prev_decision, nid, split_nids=None):
        """
        Callback when a review is made inside a PCC

        Args:
            edge: the edge reviewed
            decision: the new decision
            prev_decision: the old decision
            nid: the old nid the edge is inside of
            split_nids: the tuple of new nids created if this decision splits a PCC
        """
        action = ['within']

        infr._update_neg_metagraph(decision,
                                   prev_decision,
                                   nid,
                                   nid,
                                   split_nids=split_nids)

        if split_nids is not None:
            # A split occurred
            if infr.params['inference.update_attrs']:
                new_nid1, new_nid2 = split_nids
                cc1 = infr.pos_graph.component(new_nid1)
                cc2 = infr.pos_graph.component(new_nid2)
                infr.set_node_attrs('name_label', ub.dzip(cc1, [new_nid1]))
                infr.set_node_attrs('name_label', ub.dzip(cc2, [new_nid2]))
            action += ['split']
        else:
            if decision == POSTV:
                action += ['pos-evidence']
            elif decision == INCMP:
                action += ['incomp-evidence']
            elif decision == NEGTV:
                action += ['neg-evidence']
            else:
                action += ['other-evidence']
        return action
示例#14
0
    def predict_proba_df(verif, edges):
        """
        CommandLine:
            python -m graphid.demo DummyVerif.predict_edges

        Example:
            >>> from graphid import demo
            >>> kwargs = dict(num_pccs=40, size=2)
            >>> infr = demo.demodata_infr(**kwargs)
            >>> verif = infr.dummy_verif
            >>> edges = list(infr.graph.edges())
            >>> probs = verif.predict_proba_df(edges)
        """
        infr = verif.infr
        edges = list(it.starmap(verif.infr.e_, edges))
        prob_cache = infr.task_probs['match_state']
        is_miss = np.array([e not in prob_cache for e in edges])
        # is_hit = ~is_miss
        if np.any(is_miss):
            miss_edges = list(ub.compress(edges, is_miss))
            miss_truths = [verif._get_truth(edge) for edge in miss_edges]
            grouped_edges = ub.group_items(miss_edges, miss_truths)
            # Need to make this determenistic too
            states = [POSTV, NEGTV, INCMP]
            for key in sorted(grouped_edges.keys()):
                group = grouped_edges[key]
                probs0 = util.randn(shape=[len(group)], rng=verif.rng, a_max=1,
                                    a_min=0, **verif.dummy_params[key])
                # Just randomly assign other probs
                probs1 = verif.rng.rand(len(group)) * (1 - probs0)
                probs2 = 1 - (probs0 + probs1)
                for edge, probs in zip(group, zip(probs0, probs1, probs2)):
                    prob_cache[edge] = ub.dzip(states, probs)

        probs = pd.DataFrame(
            list(ub.take(prob_cache, edges)),
            index=util.ensure_multi_index(edges, ('aid1', 'aid2'))
        )
        return probs
示例#15
0
    def hypothesis_errors(infr, pos_subgraph, neg_edges):
        if not nx.is_connected(pos_subgraph):
            raise AssertionError('Not connected' + repr(pos_subgraph))
        infr.print(
            'Find hypothesis errors in {} nodes with {} neg edges'.format(
                len(pos_subgraph), len(neg_edges)), 3)

        pos_edges = list(pos_subgraph.edges())

        neg_weight = infr._mincut_edge_weights(neg_edges)
        pos_weight = infr._mincut_edge_weights(pos_edges)

        capacity = 'weight'
        nx.set_edge_attributes(pos_subgraph,
                               name=capacity,
                               values=ub.dzip(pos_edges, pos_weight))

        # Solve a multicut problem for multiple pairs of terminal nodes.
        # Running multiple min-cuts produces a k-factor approximation
        maybe_error_edges = set([])
        for (s, t), join_weight in zip(neg_edges, neg_weight):
            cut_weight, parts = nx.minimum_cut(pos_subgraph,
                                               s,
                                               t,
                                               capacity=capacity)
            cut_edgeset = nxu.edges_cross(pos_subgraph, *parts)
            if join_weight < cut_weight:
                join_edgeset = {(s, t)}
                chosen = join_edgeset
                hypothesis = POSTV
            else:
                chosen = cut_edgeset
                hypothesis = NEGTV
            for edge in chosen:
                if edge not in maybe_error_edges:
                    maybe_error_edges.add(edge)
                    yield (edge, hypothesis)
示例#16
0
    def _set_neg_redun_flags(infr, nid1, other_nids, flags):
        """
        Flags or unflags an nid1 as negative redundant with other nids.
        (TODO: NEG REDUN CAN BE CONSOLIDATED VIA NEG-META-GRAPH)
        """
        needs_unflag = []
        needs_flag = []
        already_flagged = []
        already_unflagged = []
        cc1 = infr.pos_graph.component(nid1)
        other_nids = list(other_nids)

        # Determine what needs what
        for nid2, flag in zip(other_nids, flags):
            was_neg_redun = infr.neg_redun_metagraph.has_edge(nid1, nid2)
            if flag:
                if not was_neg_redun:
                    needs_flag.append(nid2)
                else:
                    already_flagged.append(nid2)
            else:
                if was_neg_redun:
                    needs_unflag.append(nid2)
                else:
                    already_unflagged.append(nid2)

        # Print summary of what will be done
        def _print_helper(what, others, already=False):
            if len(others) == 0:
                return
            n_other_thresh = 4
            if len(others) > n_other_thresh:
                omsg = '#others={}'.format(len(others))
            else:
                omsg = 'others={}'.format(others)
            amsg = '(already done)' if already else ''
            msg = '{} nid={}, {} {}'.format(what, nid1, omsg, amsg)
            infr.print(msg, 5 + already)

        _print_helper('neg_redun flag=T', needs_flag)
        _print_helper('neg_redun flag=T', already_flagged, already=True)
        _print_helper('neg_redun flag=F', needs_unflag)
        _print_helper('neg_redun flag=F', already_unflagged, already=True)

        # Do the flagging/unflagging
        for nid2 in needs_flag:
            infr.neg_redun_metagraph.add_edge(nid1, nid2)
        for nid2 in needs_unflag:
            infr.neg_redun_metagraph.remove_edge(nid1, nid2)

        # Update priorities and attributes
        if infr.params['inference.update_attrs'] or infr.queue is not None:
            all_flagged_edges = []
            # Unprioritize all edges between flagged nids
            for nid2 in it.chain(needs_flag, already_flagged):
                cc2 = infr.pos_graph.component(nid2)
                all_flagged_edges.extend(nxu.edges_cross(infr.graph, cc1, cc2))

        if infr.queue is not None or infr.params['inference.update_attrs']:
            all_unflagged_edges = []
            unrev_unflagged_edges = []
            unrev_graph = infr.unreviewed_graph
            # Reprioritize unreviewed edges between unflagged nids
            # Marked inferred state of all edges
            for nid2 in it.chain(needs_unflag, already_unflagged):
                cc2 = infr.pos_graph.component(nid2)
                if infr.queue is not None:
                    _edges = nxu.edges_cross(unrev_graph, cc1, cc2)
                    unrev_unflagged_edges.extend(_edges)
                if infr.params['inference.update_attrs']:
                    _edges = nxu.edges_cross(infr.graph, cc1, cc2)
                    all_unflagged_edges.extend(_edges)

            # Batch set prioritize
            infr._remove_edge_priority(all_flagged_edges)
            infr._reinstate_edge_priority(unrev_unflagged_edges)

            if infr.params['inference.update_attrs']:
                infr.set_edge_attrs(
                    'inferred_state', ub.dzip(all_flagged_edges, ['diff'])
                )
                infr.set_edge_attrs(
                    'inferred_state', ub.dzip(all_unflagged_edges, [None])
                )
示例#17
0
def draw_perclass_prcurve(cx_to_info, classes=None, prefix='', fnum=1, **kw):
    """
    Args:
        cx_to_info (PerClass_Measures | Dict):

    Example:
        >>> # xdoctest: +REQUIRES(module:kwplot)
        >>> from kwcoco.metrics.drawing import *  # NOQA
        >>> from kwcoco.metrics import DetectionMetrics
        >>> dmet = DetectionMetrics.demo(
        >>>     nimgs=3, nboxes=(0, 10), n_fp=(0, 3), n_fn=(0, 2), classes=3, score_noise=0.1, box_noise=0.1, with_probs=False)
        >>> cfsn_vecs = dmet.confusion_vectors()
        >>> print(cfsn_vecs.data.pandas())
        >>> classes = cfsn_vecs.classes
        >>> cx_to_info = cfsn_vecs.binarize_ovr().measures()['perclass']
        >>> print('cx_to_info = {}'.format(ub.repr2(cx_to_info, nl=1)))
        >>> import kwplot
        >>> kwplot.autompl()
        >>> draw_perclass_prcurve(cx_to_info, classes)
        >>> # xdoctest: +REQUIRES(--show)
        >>> kwplot.show_if_requested()

    Ignore:
        from kwcoco.metrics.drawing import *  # NOQA
        import xdev
        globals().update(xdev.get_func_kwargs(draw_perclass_prcurve))

    """
    import kwplot
    # Sort by descending AP
    cxs = list(cx_to_info.keys())
    priority = np.array([item['ap'] for item in cx_to_info.values()])
    priority[np.isnan(priority)] = -np.inf
    cxs = list(ub.take(cxs, np.argsort(priority)))[::-1]
    aps = []
    xydata = ub.odict()
    for cx in cxs:
        info = cx_to_info[cx]
        catname = classes[cx] if isinstance(cx, int) else cx
        ap = info['ap']
        if 'pr' in info:
            pr = info['pr']
        elif 'ppv' in info:
            pr = (info['ppv'], info['tpr'])
        elif 'prec' in info:
            pr = (info['prec'], info['rec'])
        else:
            raise KeyError('pr, prec, or ppv not in info')

        if np.isfinite(ap):
            aps.append(ap)
            (precision, recall) = pr
        else:
            aps.append(np.nan)
            precision, recall = [0], [0]

        if precision is None and recall is None:
            # I thought AP=nan in this case, but I missed something
            precision, recall = [0], [0]

        label_suffix = _realpos_label_suffix(info)
        label = 'ap={:0.2f}: {} ({})'.format(ap, catname, label_suffix)

        xydata[label] = (recall, precision)

    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', 'Mean of empty slice', RuntimeWarning)
        mAP = np.nanmean(aps)

    if 0:
        import seaborn as sns
        import pandas as pd
        # sns.set()
        # TODO: deprecate multi_plot for seaborn?
        data_groups = {
            key: {'recall': r, 'precision': p}
            for key, (r, p) in xydata.items()
        }
        print('data_groups = {}'.format(ub.repr2(data_groups, nl=3)))

        longform = []
        for key, subdata in data_groups.items():
            subdata = pd.DataFrame.from_dict(subdata)
            subdata['label'] = key
            longform.append(subdata)
        data = pd.concat(longform)

        fig = kwplot.figure(fnum=fnum)
        ax = fig.gca()
        longform = []
        for key, (r, p) in xydata.items():
            subdata = pd.DataFrame.from_dict({'recall': r, 'precision': p, 'label': key})
            longform.append(subdata)
        data = pd.concat(longform)

        palette = ub.dzip(xydata.keys(), kwplot.distinct_colors(len(xydata)))
        # markers = ub.dzip(xydata.keys(), kwplot.distinct_markers(len(xydata)))

        sns.lineplot(
            data=data, x='recall', y='precision',
            hue='label', style='label', ax=ax,
            # markers=markers,
            estimator=None,
            ci=0,
            hue_order=list(xydata.keys()),
            palette=palette,
        )
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    else:
        ax = kwplot.multi_plot(
            xydata=xydata, fnum=fnum,
            xlim=(0, 1), ylim=(0, 1), xpad=0.01, ypad=0.01,
            xlabel='recall', ylabel='precision',
            err_style='bars',
            title=prefix + 'OVR mAP={:.4f}'.format(mAP),
            legend_loc='lower right',
            color='distinct', linestyle='cycle', marker='cycle', **kw
        )
    return ax
示例#18
0
    def ensure_priority_scores(infr, priority_edges):
        """
        Ensures that priority attributes are assigned to the edges.
        This does not change the state of the queue.

        Doctest:
            >>> from graphid import demo
            >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2)
            >>> edges = list(infr.edges())
            >>> infr.ensure_priority_scores(edges)
        """
        if infr.verifiers:
            infr.print(
                'Prioritizing {} edges with one-vs-one probs'.format(
                    len(priority_edges)), 1)

            infr.ensure_task_probs(priority_edges)

            primary_task = 'match_state'
            match_probs = infr.task_probs[primary_task]
            primary_thresh = infr.task_thresh[primary_task]

            # Read match_probs into a DataFrame
            primary_probs = pd.DataFrame(
                list(ub.take(match_probs, priority_edges)),
                index=util.ensure_multi_index(priority_edges,
                                              ('aid1', 'aid2')))

            # Convert match-state probabilities into priorities
            prob_match = primary_probs[POSTV]

            # Initialize priorities to probability of matching
            default_priority = prob_match.copy()

            # If the edges are currently between the same individual, then
            # prioritize by non-positive probability (because those edges might
            # expose an inconsistency)
            already_pos = [
                infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v)
                for u, v in priority_edges
            ]
            default_priority[already_pos] = 1 - default_priority[already_pos]

            if infr.params['autoreview.enabled']:
                if infr.params['autoreview.prioritize_nonpos']:
                    # Give positives that pass automatic thresholds high priority
                    _probs = primary_probs[POSTV]
                    flags = _probs > primary_thresh[POSTV]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

                    # Give negatives that pass automatic thresholds high priority
                    _probs = primary_probs[NEGTV]
                    flags = _probs > primary_thresh[NEGTV]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

                    # Give not-comps that pass automatic thresholds high priority
                    _probs = primary_probs[INCMP]
                    flags = _probs > primary_thresh[INCMP]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

            infr.set_edge_attrs('prob_match', prob_match.to_dict())
            infr.set_edge_attrs('default_priority', default_priority.to_dict())

            metric = 'default_priority'
            priority = default_priority
        elif infr.cm_list is not None:
            infr.print(
                'Prioritizing {} edges with one-vs-vsmany scores'.format(
                    len(priority_edges), 1))
            # Not given any deploy classifier, this is the best we can do
            scores = infr._make_lnbnn_scores(priority_edges)
            metric = 'normscore'
            priority = scores
        else:
            infr.print('WARNING: No verifiers to prioritize {} edge(s)'.format(
                len(priority_edges)))
            metric = 'random'
            priority = np.zeros(len(priority_edges)) + 1e-6

        infr.set_edge_attrs(metric, ub.dzip(priority_edges, priority))
        return metric, priority
示例#19
0
    def _fix_keys(model_state_dict):
        """
        Hack around DataParallel wrapper. If there is nothing in common between
        the two models check to see if prepending 'module.' to other keys fixes
        it.
        """
        other_keys = set(model_state_dict)
        self_keys = set(self_state)

        if 0:
            # Automatic way to reduce nodes in the trees?
            # If node b always follows node a, can we contract it?
            nodes1 = [n for p in other_keys for n in p.split('.')]
            nodes2 = [n for p in self_keys for n in p.split('.')]
            tups1 = list(tup for key in other_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            tups2 = list(tup for key in self_keys
                         for tup in ub.iter_window(key.split('.'), 2))
            x = ub.ddict(list)
            for a, b in tups1:
                x[a].append(b)
            for a, b in tups2:
                x[a].append(b)

            nodehist = ub.dict_hist(nodes1 + nodes2)

            for k, v in x.items():
                print('----')
                print(k)
                print(nodehist[k])
                follow_hist = ub.dict_hist(v)
                print(follow_hist)
                total = sum(follow_hist.values())
                if ub.allsame(follow_hist.values()) and total == nodehist[k]:
                    print('CONTRACT')

            # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2]))
            # print(forest_str(paths_to_otree(other_keys, '.')))

        # common_keys = other_keys.intersection(self_keys)
        # if not common_keys:
        if not other_keys.issubset(self_keys):
            if association == 'strict':
                pass
            elif association == 'module-hack':
                # If there are no common keys try a hack
                prefix = 'module.'

                def smap(f, ss):
                    return set(map(f, ss))

                def fix1(k):
                    return prefix + k

                def fix2(k):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                if smap(fix1, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix1, model_state_dict)
                elif smap(fix2, other_keys).intersection(self_keys):
                    model_state_dict = ub.map_keys(fix2, model_state_dict)
            elif association == 'prefix-hack':
                import functools

                def add_prefix(k, prefix):
                    return prefix + k

                def remove_prefix(k, prefix):
                    if k.startswith(prefix):
                        return k[len(prefix):]

                # set1 = other_keys
                # target_set2 = self_keys
                found = _best_prefix_transform(other_keys, self_keys)
                if found is not None:
                    for action, prefix in found['transform']:
                        if action == 'add':
                            func = functools.partial(add_prefix, prefix=prefix)
                        elif action == 'remove':
                            func = functools.partial(remove_prefix,
                                                     prefix=prefix)
                        else:
                            raise AssertionError
                        model_state_dict = ub.map_keys(func, model_state_dict)
            elif association in {'embedding', 'isomorphism'}:
                if verbose > 1:
                    print('Using subpath {} association, may take some time'.
                          format(association))
                # I believe this is the correct way to solve the problem
                paths1 = sorted(other_keys)
                paths2 = sorted(self_state)

                if 1:
                    # hack to filter to reduce tree size in embedding problem
                    def shrink_paths(paths):
                        new_paths = []
                        for p in paths:
                            p = p.replace('.0', ':0')
                            p = p.replace('.1', ':1')
                            p = p.replace('.2', ':2')
                            p = p.replace('.3', ':3')
                            p = p.replace('.4', ':4')
                            p = p.replace('.5', ':5')
                            p = p.replace('.6', ':6')
                            p = p.replace('.7', ':7')
                            p = p.replace('.8', ':8')
                            p = p.replace('.9', ':9')
                            p = p.replace('.weight', ':weight')
                            p = p.replace('.bias', ':bias')
                            p = p.replace('.num_batches_tracked',
                                          ':num_batches_tracked')
                            p = p.replace('.running_mean', ':running_mean')
                            p = p.replace('.running_var', ':running_var')
                            # p = p.replace('.conv1', ':conv1')
                            # p = p.replace('.conv2', ':conv2')
                            # p = p.replace('.conv3', ':conv3')
                            # p = p.replace('.bn1', ':bn1')
                            # p = p.replace('.bn2', ':bn2')
                            # p = p.replace('.bn3', ':bn3')
                            new_paths.append(p)
                        return new_paths

                    # Reducing the depth saves a lot of time
                    paths1_ = shrink_paths(paths1)
                    paths2_ = shrink_paths(paths2)

                subpaths1, subpaths2 = maximum_common_ordered_subpaths(
                    paths1_, paths2_, sep='.', mode=association)
                subpaths1 = [p.replace(':', '.') for p in subpaths1]
                subpaths2 = [p.replace(':', '.') for p in subpaths2]
                mapping = ub.dzip(subpaths1, subpaths2)
                if verbose > 1:
                    other_unmapped = sorted(other_keys - set(mapping.keys()))
                    self_unmapped = sorted(self_keys - set(mapping.values()))
                    print('-- embed association (other -> self) --')
                    print('mapping = {}'.format(ub.repr2(mapping, nl=1)))
                    print('self_unmapped = {}'.format(
                        ub.repr2(self_unmapped, nl=1)))
                    print('other_unmapped = {}'.format(
                        ub.repr2(other_unmapped, nl=1)))
                    print('len(mapping) = {}'.format(
                        ub.repr2(len(mapping), nl=1)))
                    print('len(self_unmapped) = {}'.format(
                        ub.repr2(len(self_unmapped), nl=1)))
                    print('len(other_unmapped) = {}'.format(
                        ub.repr2(len(other_unmapped), nl=1)))
                    print('-- end embed association --')

                # HACK: something might be wrong, there was an instance with
                # HRNet_w32 where multiple keys mapped to the same key
                # bad keys were incre_modules.3.0.conv1.weight and conv1.weight
                #
                # This will not error, but may produce bad output
                try:
                    model_state_dict = ub.map_keys(lambda k: mapping.get(k, k),
                                                   model_state_dict)
                except Exception as ex:
                    HACK = 1
                    if HACK:
                        new_state_dict_ = {}
                        for k, v in model_state_dict.items():
                            new_state_dict_[mapping.get(k, k)] = v
                        model_state_dict = new_state_dict_
                        warnings.warn('ex = {!r}'.format(ex))
                    else:
                        raise
            else:
                raise KeyError(association)
        return model_state_dict
示例#20
0
def _dz(a, b):
    a = a.tolist() if isinstance(a, np.ndarray) else list(a)
    b = b.tolist() if isinstance(b, np.ndarray) else list(b)
    return ub.dzip(a, b)
示例#21
0
def gpu_info(new_mode=True, respect_visible_devices=True):
    """
    Run nvidia-smi and parse output

    Args:
        new_mode: internal argument that changes the underlying implementation

        respect_visible_devices (bool, default=True): if True respects
            CUDA_VISIBLE_DEVICES environment variable, otherwise returns
            data corresponding to physical GPU indexes.

    Returns:
        OrderedDict: info about each GPU indexed by gpu number

    Note:
        Not gaurenteed to work if CUDA is not installed.

    Warnings:
        if nvidia-smi is not installed

    CommandLine:
        xdoctest -m netharn.device gpu_info --cuda

    Example:
        >>> # xdoctest: +REQUIRES(--cuda)
        >>> from netharn.device import gpu_info
        >>> gpus = gpu_info()
        >>> # xdoctest: +IGNORE_WANT
        >>> print('gpus = {}'.format(ub.repr2(gpus, nl=4)))
        >>> assert len(gpus) == torch.cuda.device_count()
        gpus = {
            0: {
                'gpu_uuid': 'GPU-348ebe36-252b-46fa-8a97-477ae331f6f4',
                'index': '0',
                'mem_avail': 10013.0,
                'mem_total': 11170.0,
                'mem_used': 1157.0,
                'memory.free': '10013 MiB',
                'memory.total': '11170 MiB',
                'memory.used': '1157 MiB',
                'name': 'GeForce GTX 1080 Ti',
                'num': 0,
                'num_compute_procs': 1,
                'procs': [
                    {
                        'gpu_num': 0,
                        'gpu_uuid': 'GPU-348ebe36-252b-46fa-8a97-477ae331f6f4',
                        'name': '/usr/bin/python',
                        'pid': '19912',
                        'type': 'C',
                        'used_memory': '567 MiB',
                    },
                ],
            },
        }


    """
    pass
    """
    Ignore:

        # official nvidia-smi python bindings
        pip install nvidia-ml-py

        import pynvml

        # TODO: make more efficient calls to nvidia-smi

        utilization.gpu
        utilization.memory
        compute_mode
        memory.total
        memory.used
        memory.free
        index
        name
        count

        nvidia-smi pmon --count 1

        nvidia-smi  -h
        nvidia-smi  --help-query-compute-apps
        nvidia-smi  --help-query-gpu

        nvidia-smi --help-query-accounted-apps
        nvidia-smi --help-query-supported-clocks
        nvidia-smi --help-query-retired-pages
        nvidia-smi --query-accounted-apps="pid" --format=csv

        nvidia-smi  --query-gpu="index,memory.total,memory.used,memory.free,count,name,gpu_uuid" --format=csv
        nvidia-smi  --query-compute-apps="pid,name,gpu_uuid,used_memory" --format=csv
        nvidia-smi  --query-accounted-apps="gpu_name,pid" --format=csv

        import timerit
        ti = timerit.Timerit(40, bestof=5, verbose=2)
        for timer in ti.reset('new1'):
            with timer:
                gpu_info(True)
        for timer in ti.reset('old'):
            with timer:
                gpu_info(False)
        for timer in ti.reset('xml'):
            with timer:
                gpu_info('xml')

        xdev.profile_now(gpu_info)('xml')

        for timer in ti.reset('cmd'):
            with timer:
                ub.cmd(['nvidia-smi', '--query', '--xml-format'])

        for timer in ti.reset('check_output'):
            with timer:
                import subprocess
                subprocess.check_output(['nvidia-smi', '--query', '--xml-format'])
    """
    if new_mode == 'xml':
        # Parse info out of the nvidia xml query
        # note, that even though this has less calls to nvidia-smi, there
        # is a lot more output, which makes it the slowest method especially
        # for multi-gpu systems
        import xml.etree.ElementTree as ET

        info = ub.cmd(['nvidia-smi', '--query', '--xml-format'])
        if info['ret'] != 0:
            print(info['out'])
            print(info['err'])
            warnings.warn('Problem running nvidia-smi: ret='.format(
                info['ret']))
            raise NvidiaSMIError
        xml_string = info['out']
        root = ET.fromstring(xml_string)

        gpus = {}
        for gpu_elem in root.findall('gpu'):
            gpu = {}
            gpu['uuid'] = gpu_elem.find('uuid').text
            gpu['name'] = gpu_elem.find('product_name').text
            gpu['num'] = int(gpu_elem.find('minor_number').text)
            gpu['procs'] = [{item.tag: item.text
                             for item in proc_elem}
                            for proc_elem in gpu_elem.find('processes')]

            for item in gpu_elem.find('fb_memory_usage'):
                gpu['memory.' + item.tag] = item.text

            gpu['mem_used'] = float(gpu['memory.used'].strip().replace(
                'MiB', ''))
            gpu['mem_total'] = float(gpu['memory.total'].strip().replace(
                'MiB', ''))
            gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used']
            gpus[gpu['num']] = gpu

            # Let each GPU know how many processes are currently using it
            num_compute_procs = 0
            num_graphics_procs = 0
            for proc in gpu['procs']:
                if proc['type'] == 'C':
                    num_compute_procs += 1
                elif proc['type'] == 'G':
                    num_graphics_procs += 1
                else:
                    raise NotImplementedError(proc['type'])
            gpu['num_compute_procs'] = num_compute_procs
            gpu['num_graphics_procs'] = num_graphics_procs

    elif new_mode:
        # This is slightly more robust than the old mode, but it also makes
        # more than one call to nvidia-smi and cannot return information about
        # graphics processes.
        fields = [
            'index', 'memory.total', 'memory.used', 'memory.free', 'name',
            'gpu_uuid'
        ]
        mode = 'query-gpu'
        try:
            gpu_rows = _query_nvidia_smi(mode, fields)
        except Exception as ex:
            warnings.warn('Problem running nvidia-smi: {!r}'.format(ex))
            raise NvidiaSMIError

        fields = ['pid', 'name', 'gpu_uuid', 'used_memory']
        mode = 'query-compute-apps'
        proc_rows = _query_nvidia_smi(mode, fields)

        # Coerce into the old-style format for backwards compatibility
        gpus = {}
        for row in gpu_rows:
            gpu = row.copy()
            num = int(gpu['index'])
            gpu['num'] = num
            gpu['mem_used'] = float(gpu['memory.used'].strip().replace(
                'MiB', ''))
            gpu['mem_total'] = float(gpu['memory.total'].strip().replace(
                'MiB', ''))
            gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used']
            gpu['procs'] = []
            gpus[num] = gpu

        gpu_uuid_to_num = {
            gpu['gpu_uuid']: gpu['num']
            for gpu in gpus.values()
        }

        for row in proc_rows:
            # Give each GPU info on which processes are using it
            proc = row.copy()
            proc['type'] = 'C'
            proc['gpu_num'] = gpu_uuid_to_num[proc['gpu_uuid']]
            num = proc['gpu_num']
            gpus[num]['procs'].append(proc)

        WITH_GPU_PROCS = False
        if WITH_GPU_PROCS:
            # Hacks in gpu-procs if enabled
            import re
            info = ub.cmd('nvidia-smi pmon -c 1')
            for line in info['out'].split('\n'):
                line = line.strip()
                if line and not line.startswith("#"):
                    parts = re.split(r'\s+', line, maxsplit=7)
                    if parts[1] != '-':
                        header = [
                            'gpu_num', 'pid', 'type', 'sm', 'mem', 'enc',
                            'dec', 'name'
                        ]
                        proc = ub.dzip(header, parts)
                        proc['gpu_num'] = int(proc['gpu_num'])
                        if proc['type'] == 'G':
                            gpu = gpus[proc['gpu_num']]
                            gpu['procs'].append(proc)
                            proc['gpu_uuid'] = gpu['gpu_uuid']

        for gpu in gpus.values():
            # Let each GPU know how many processes are currently using it
            num_compute_procs = 0
            num_graphics_procs = 0
            for proc in gpu['procs']:
                if proc['type'] == 'C':
                    num_compute_procs += 1
                elif proc['type'] == 'G':
                    num_graphics_procs += 1
                else:
                    raise NotImplementedError(proc['type'])

            # NOTE calling nvidia-smi in query mode does not seem to have
            # support for getting info about graphics procs.
            gpu['num_compute_procs'] = num_compute_procs
            if WITH_GPU_PROCS:
                gpu['num_graphics_procs'] = num_graphics_procs

    else:
        # This is the original implementation of this function. It parses the
        # direct output of nvidia smi, it is prone to failure if the format of
        # this program's output ever changes.
        try:
            result = ub.cmd('nvidia-smi')
            if result['ret'] != 0:
                warnings.warn('Problem running nvidia-smi.')
                raise NvidiaSMIError
        except Exception:
            warnings.warn('Could not run nvidia-smi.')
            raise NvidiaSMIError

        lines = result['out'].splitlines()

        gpu_lines = []
        proc_lines = []
        current = None

        state = '0_gpu_read'

        for line in lines:
            if current is None:
                # Signals the start of GPU info
                if line.startswith('|====='):
                    current = []
            else:
                if state == '0_gpu_read':
                    if len(line.strip()) == 0:
                        # End of GPU info
                        state = '1_proc_read'
                        current = None
                    elif line.startswith('+----'):
                        # Move to the next GPU
                        gpu_lines.append(current)
                        current = []
                    else:
                        current.append(line)
                elif state == '1_proc_read':
                    if line.startswith('+----'):
                        # Move to the next GPU
                        # End of proc info
                        state = 'terminate'
                        break
                    else:
                        proc_lines.append(line)
                else:
                    raise AssertionError(state)

        def parse_gpu_lines(lines):
            line1 = lines[0]
            line2 = lines[1]
            gpu = {}
            gpu['name'] = ' '.join(line1.split('|')[1].split()[1:-1])
            gpu['num'] = int(' '.join(line1.split('|')[1].split()[0]))

            mempart = line2.split('|')[2].strip()
            part1, part2 = mempart.split('/')
            gpu['mem_used'] = float(part1.strip().replace('MiB', ''))
            gpu['mem_total'] = float(part2.strip().replace('MiB', ''))
            gpu['mem_avail'] = gpu['mem_total'] - gpu['mem_used']
            return gpu

        def parse_proc_line(line):
            inner = '|'.join(line.split('|')[1:-1])
            if 'no running processes found' in inner.lower():
                # Handle "No running processes found" case in issue #2
                return None

            parts = [p.strip() for p in inner.split(' ')]
            parts = [p for p in parts if p]

            index = int(parts[0])
            pid = int(parts[1])
            proc_type = str(parts[2])
            proc_name = str(parts[3])
            used_mem = float(parts[4].replace('MiB', ''))

            proc = {
                'gpu_num': index,
                'pid': pid,
                'type': proc_type,
                'name': proc_name,
                'used_mem': used_mem,
            }
            return proc

        gpus = {}
        for num, lines in enumerate(gpu_lines):
            gpu = parse_gpu_lines(lines)
            assert num == gpu['num'], (
                'nums ({}, {}) do not agree. probably a parsing error'.format(
                    num, gpu['num']))
            assert num not in gpus, (
                'Multiple GPUs labeled as num {}. Probably a parsing error'.
                format(num))
            gpus[num] = gpu
            gpus[num]['procs'] = []

        for line in proc_lines:
            # Give each GPU info on which processes are using it
            proc = parse_proc_line(line)
            if proc is not None:
                num = proc['gpu_num']
                gpus[num]['procs'].append(proc)

        for gpu in gpus.values():
            # Let each GPU know how many processes are currently using it
            num_compute_procs = 0
            num_graphics_procs = 0
            for proc in gpu['procs']:
                if proc['type'] == 'C':
                    num_compute_procs += 1
                elif proc['type'] == 'G':
                    num_graphics_procs += 1
                else:
                    raise NotImplementedError(proc['type'])
            gpu['num_compute_procs'] = num_compute_procs
            gpu['num_graphics_procs'] = num_graphics_procs

    if respect_visible_devices:
        # Respect CUDA_VISIBLE_DEVICES, nvidia-smi does not respect this by
        # default so remap to gain the appropriate effect.
        val = os.environ.get('CUDA_VISIBLE_DEVICES', '')
        parts = (p.strip() for p in val.split(','))
        visible_devices = [int(p) for p in parts if p]

        if visible_devices:
            remapped = {}
            for visible_idx, real_idx in enumerate(visible_devices):
                gpu = remapped[visible_idx] = gpus[real_idx]
                gpu['index'] = str(visible_idx)
                gpu['num'] = visible_idx
                gpu['real_num'] = real_idx
            gpus = remapped

    return gpus
示例#22
0
    def find_mst_edges(infr, label='name_label'):
        """
        Returns edges to augment existing PCCs (by label) in order to ensure
        they are connected with positive edges.

        Example:
            >>> # DISABLE_DOCTEST
            >>> from graphid.core.mixin_helpers import *  # NOQA
            >>> import ibeis
            >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
            >>> infr = ibeis.AnnotInference(ibs, 'all', autoinit=True)
            >>> label = 'orig_name_label'
            >>> label = 'name_label'
            >>> infr.find_mst_edges()
            >>> infr.ensure_mst()

        Ignore:
            old_mst_edges = [
                e for e, d in infr.edges(data=True)
                if d.get('user_id', None) == 'algo:mst'
            ]
            infr.graph.remove_edges_from(old_mst_edges)
            infr.pos_graph.remove_edges_from(old_mst_edges)
            infr.neg_graph.remove_edges_from(old_mst_edges)
            infr.incomp_graph.remove_edges_from(old_mst_edges)

        """
        # Find clusters by labels
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ub.group_items(node_to_label.keys(),
                                        node_to_label.values())

        weight_heuristic = False
        # infr.ibs is not None
        if weight_heuristic:
            annots = infr.ibs.annots(infr.aids)
            node_to_time = ub.dzip(annots, annots.time)
            node_to_view = ub.dzip(annots, annots.viewpoint_code)
            enabled_heuristics = {
                'view_weight',
                'time_weight',
            }

        def _heuristic_weighting(nodes, avail_uv):
            avail_uv = np.array(avail_uv)
            weights = np.ones(len(avail_uv))

            if 'view_weight' in enabled_heuristics:
                from graphid.core import _rhomb_dist
                view_edge = [(node_to_view[u], node_to_view[v])
                             for (u, v) in avail_uv]
                view_weight = np.array([
                    _rhomb_dist.VIEW_CODE_DIST[(v1, v2)]
                    for (v1, v2) in view_edge
                ])
                # Assume comparable by default and prefer undefined
                # more than probably not, but less than definately so.
                view_weight[np.isnan(view_weight)] = 1.5
                # Prefer viewpoint 10x more than time
                weights += 10 * view_weight

            if 'time_weight' in enabled_heuristics:
                # Prefer linking annotations closer in time
                times = list(ub.take(node_to_time, nodes))
                maxtime = util.safe_max(times, fill=1, nans=False)
                mintime = util.safe_min(times, fill=0, nans=False)
                time_denom = maxtime - mintime
                # Try linking by time for lynx data
                time_delta = np.array([
                    abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv
                ])
                time_weight = time_delta / time_denom
                weights += time_weight

            weights = np.array(weights)
            weights[np.isnan(weights)] = 1.0

            avail = [(u, v, {
                'weight': w
            }) for (u, v), w in zip(avail_uv, weights)]
            return avail

        new_edges = []
        prog = ub.ProgIter(list(label_to_nodes.keys()),
                           desc='finding mst edges',
                           enabled=infr.verbose > 0)
        for nid in prog:
            nodes = set(label_to_nodes[nid])
            if len(nodes) == 1:
                continue
            # We want to make this CC connected
            pos_sub = infr.pos_graph.subgraph(nodes, dynamic=False)
            impossible = set(
                it.starmap(
                    e_,
                    it.chain(
                        nxu.edges_inside(infr.neg_graph, nodes),
                        nxu.edges_inside(infr.incomp_graph, nodes),
                        # nxu.edges_inside(infr.unknown_graph, nodes),
                    )))
            if len(impossible) == 0 and not weight_heuristic:
                # Simple mst augmentation
                aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1))
            else:
                complement = it.starmap(e_, nxu.complement_edges(pos_sub))
                avail_uv = [(u, v) for u, v in complement
                            if (u, v) not in impossible]
                if weight_heuristic:
                    # Can do heuristic weighting to improve the MST
                    avail = _heuristic_weighting(nodes, avail_uv)
                else:
                    avail = avail_uv
                # print(len(pos_sub))
                try:
                    aug_edges = list(
                        nxu.k_edge_augmentation(pos_sub, k=1, avail=avail))
                except nx.NetworkXUnfeasible:
                    print('Warning: MST augmentation is not feasible')
                    print('explicit negative edges might disconnect a PCC')
                    aug_edges = list(
                        nxu.k_edge_augmentation(pos_sub,
                                                k=1,
                                                avail=avail,
                                                partial=True))
            new_edges.extend(aug_edges)
        prog.ensure_newline()

        for edge in new_edges:
            assert not infr.graph.has_edge(*edge), (
                'alrady have edge={}'.format(edge))
        return new_edges
示例#23
0
def benchmark_template():
    import ubelt as ub
    import pandas as pd
    import timerit

    def method1(x, y, z):
        ret = []
        for i in range((x + y) * z):
            ret.append(i)
        return ret

    def method2(x, y, z):
        ret = [i for i in range((x + y) * z)]
        return ret

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(100, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    basis = {
        'method': ['method1', 'method2'],
        'x': list(range(7)),
        'y': [0, 100],
        'z': [2, 3]
        # 'param_name': [param values],
    }
    xlabel = 'x'
    # Set these to param labels that directly transfer to method kwargs
    kw_labels = ['x', 'y', 'z']
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['y'],
        'size': ['z'],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(), kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            chunk_iter = ub.chunks(ti.times, ti.bestof)
            times = list(map(min, chunk_iter))  # TODO: timerit method for this
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time'
                                          ]].mean().rename({'time': 'mean'},
                                                           axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(
        set(stats_data.columns) -
        {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(),
                                      win_prob)).sort_values(ascending=False)
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y=time_key,
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark Name')
        ax.set_xlabel('Size (todo: A better x-variable description)')
        ax.set_ylabel('Time (todo: A better y-variable description)')
        # ax.set_xscale('log')
        # ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
示例#24
0
def _precompute_class_weights(dset, mode='median-idf'):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples'))
        >>> from sseg_camvid import *  # NOQA
        >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize()
        >>> dset = harn.datasets['train']
    """

    assert mode in ['median-idf', 'log-median-idf']

    total_freq = _cached_class_frequency(dset)

    def logb(arr, base):
        if base == 'e':
            return np.log(arr)
        elif base == 2:
            return np.log2(arr)
        elif base == 10:
            return np.log10(arr)
        else:
            out = np.log(arr)
            out /= np.log(base)
            return out

    _min, _max = np.percentile(total_freq, [5, 95])
    is_valid = (_min <= total_freq) & (total_freq <= _max)
    if np.any(is_valid):
        middle_value = np.median(total_freq[is_valid])
    else:
        middle_value = np.median(total_freq)

    # variant of median-inverse-frequency
    nonzero_freq = total_freq[total_freq != 0]
    if len(nonzero_freq):
        total_freq[total_freq == 0] = nonzero_freq.min() / 2

    if mode == 'median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
    elif mode == 'log-median-idf':
        weights = (middle_value / total_freq)
        weights[~np.isfinite(weights)] = 1.0
        base = 2
        base = np.exp(1)
        weights = logb(weights + (base - 1), base)
        weights = np.maximum(weights, .1)
        weights = np.minimum(weights, 10)
    else:
        raise KeyError('mode = {!r}'.format(mode))

    weights = np.round(weights, 2)
    cname_to_weight = ub.dzip(dset.classes, weights)
    print('weights: ' + ub.repr2(cname_to_weight))

    if False:
        # Inspect the weights
        import kwplot
        kwplot.autoplt()

        cname_to_weight = ub.dzip(dset.classes, weights)
        cname_to_weight = ub.dict_subset(cname_to_weight, ub.argsort(cname_to_weight))
        kwplot.multi_plot(
            ydata=list(cname_to_weight.values()),
            kind='bar',
            xticklabels=list(cname_to_weight.keys()),
            xtick_rotation=90,
            fnum=2, doclf=True)

    return weights
示例#25
0
    def update_visual_attrs(infr,
                            graph=None,
                            show_reviewed_edges=True,
                            show_unreviewed_edges=False,
                            show_inferred_diff=True,
                            show_inferred_same=True,
                            show_recent_review=False,
                            highlight_reviews=True,
                            show_inconsistency=True,
                            wavy=False,
                            simple_labels=False,
                            show_labels=True,
                            reposition=True,
                            use_image=False,
                            edge_overrides=None,
                            node_overrides=None,
                            colorby='name_label',
                            **kwargs
                            # hide_unreviewed_inferred=True
                            ):
        infr.print('update_visual_attrs', 3)
        if graph is None:
            graph = infr.graph
        # if hide_cuts is not None:
        #     # show_unreviewed_cuts = not hide_cuts
        #     show_reviewed_cuts = not hide_cuts

        if not getattr(infr, '_viz_init_nodes', False):
            infr._viz_init_nodes = True
            nx.set_node_attributes(graph, name='shape', values='circle')
            # infr.set_node_attrs('shape', 'circle')

        if getattr(infr, '_viz_image_config_dirty', True):
            infr.update_node_image_attribute(graph=graph, use_image=use_image)

        def get_any(dict_, keys, default=None):
            for key in keys:
                if key in dict_:
                    return dict_[key]
            return default

        show_cand = get_any(
            kwargs, ['show_candidate_edges', 'show_candidates', 'show_cand'])
        if show_cand is not None:
            show_cand = True
            show_reviewed_edges = True
            show_unreviewed_edges = True
            show_inferred_diff = True
            show_inferred_same = True

        if kwargs.get('show_all'):
            show_cand = True

        # alpha_low = .5
        alpha_med = .9
        alpha_high = 1.0

        dark_background = graph.graph.get('dark_background', None)

        # Ensure we are starting from a clean slate
        # if reposition:
        util.nx_delete_edge_attr(graph, infr.visual_edge_attrs_appearance)

        # Set annotation node labels
        node_to_nid = None
        if not show_labels:
            nx.set_node_attributes(graph,
                                   name='label',
                                   values=ub.dzip(graph.nodes(), ['']))
        else:
            if simple_labels:
                nx.set_node_attributes(
                    graph,
                    name='label',
                    values={n: str(n)
                            for n in graph.nodes()})
            else:
                if node_to_nid is None:
                    node_to_nid = nx.get_node_attributes(graph, 'name_label')
                node_to_view = nx.get_node_attributes(graph, 'viewpoint')
                if node_to_view:
                    annotnode_to_label = {
                        aid: 'aid=%r%s\nnid=%r' %
                        (aid, node_to_view[aid], node_to_nid[aid])
                        for aid in graph.nodes()
                    }
                else:
                    annotnode_to_label = {
                        aid: 'aid=%r\nnid=%r' % (aid, node_to_nid[aid])
                        for aid in graph.nodes()
                    }
                nx.set_node_attributes(graph,
                                       name='label',
                                       values=annotnode_to_label)

        # NODE_COLOR: based on name_label
        color_nodes(graph,
                    labelattr=colorby,
                    outof=kwargs.get('outof', None),
                    sat_adjust=-.4)

        # EDGES:
        # Grab different types of edges
        edges, edge_colors = infr.get_colored_edge_weights(
            graph, highlight_reviews)

        # reviewed_states = nx.get_edge_attributes(graph, 'evidence_decision')
        reviewed_states = {
            e: infr.edge_decision(e)
            for e in infr.graph.edges()
        }
        edge_to_inferred_state = nx.get_edge_attributes(
            graph, 'inferred_state')
        # dummy_edges = [edge for edge, flag in
        #                nx.get_edge_attributes(graph, '_dummy_edge').items()
        #                if flag]
        edge_to_reviewid = nx.get_edge_attributes(graph, 'review_id')
        recheck_edges = [
            edge for edge, split in nx.get_edge_attributes(
                graph, 'maybe_error').items() if split
        ]
        decision_to_edge = util.group_pairs(reviewed_states.items())
        neg_edges = decision_to_edge[NEGTV]
        pos_edges = decision_to_edge[POSTV]
        incomp_edges = decision_to_edge[INCMP]
        unreviewed_edges = decision_to_edge[UNREV]

        inferred_same = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'same'
        ]
        inferred_diff = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'diff'
        ]
        inconsistent_external = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'inconsistent_external'
        ]
        inferred_notcomp = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'notcomp'
        ]

        reviewed_edges = incomp_edges + pos_edges + neg_edges
        compared_edges = pos_edges + neg_edges
        uncompared_edges = util.setdiff(edges, compared_edges)
        nontrivial_inferred_same = util.setdiff(
            inferred_same, pos_edges + neg_edges + incomp_edges)
        nontrivial_inferred_diff = util.setdiff(
            inferred_diff, pos_edges + neg_edges + incomp_edges)
        nontrivial_inferred_edges = (nontrivial_inferred_same +
                                     nontrivial_inferred_diff)

        # EDGE_COLOR: based on edge_weight
        nx.set_edge_attributes(graph,
                               name='color',
                               values=ub.dzip(edges, edge_colors))

        # LINE_WIDTH: based on review_state
        # unreviewed_width = 2.0
        # reviewed_width = 5.0
        unreviewed_width = 1.0
        reviewed_width = 2.0
        if highlight_reviews:
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(reviewed_edges,
                                                  [reviewed_width]))
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(unreviewed_edges,
                                                  [unreviewed_width]))
        else:
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(edges, [unreviewed_width]))

        # EDGE_STROKE: based on decision and maybe_error
        # fg = util.WHITE if dark_background else util.BLACK
        # nx.set_edge_attributes(graph, name='stroke', values=ub.dzip(reviewed_edges, [{'linewidth': 3, 'foreground': fg}]))
        if show_inconsistency:
            nx.set_edge_attributes(
                graph,
                name='stroke',
                values=ub.dzip(recheck_edges, [{
                    'linewidth': 5,
                    'foreground': infr._error_color
                }]))

        # Set linestyles to emphasize PCCs
        # Dash lines between PCCs inferred to be different
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inferred_diff, ['dashed']))

        # Treat incomparable/incon-external inference as different
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inferred_notcomp, ['dashed']))
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inconsistent_external,
                                              ['dashed']))

        # Dot lines that we are unsure of
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(unreviewed_edges, ['dotted']))

        # Cut edges are implicit and dashed
        # nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(cut_edges, [True]))
        # nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(cut_edges, ['dashed']))
        # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(cut_edges, [alpha_med]))

        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(uncompared_edges, [True]))

        # Only matching edges should impose constraints on the graph layout
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(neg_edges, [True]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(neg_edges, [alpha_med]))
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(incomp_edges, [True]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(incomp_edges, [alpha_med]))

        # Ensure reviewed edges are visible
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(reviewed_edges, [False]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(reviewed_edges, [alpha_high]))

        if True:
            # Infered same edges can be allowed to constrain in order
            # to make things look nice sometimes
            nx.set_edge_attributes(graph,
                                   name='implicit',
                                   values=ub.dzip(inferred_same, [False]))
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_same, [alpha_high]))

        if not kwargs.get('show_same', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_same, [0]))

        if not kwargs.get('show_diff', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_diff, [0]))

        if not kwargs.get('show_positive_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(pos_edges, [0]))

        if not kwargs.get('show_negative_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(neg_edges, [0]))

        if not kwargs.get('show_incomparable_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(incomp_edges, [0]))

        if not kwargs.get('show_between', True):
            if node_to_nid is None:
                node_to_nid = nx.get_node_attributes(graph, 'name_label')
            between_edges = [(u, v) for u, v in edges
                             if node_to_nid[u] != node_to_nid[v]]
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(between_edges, [0]))

        # SKETCH: based on inferred_edges
        # Make inferred edges wavy
        if wavy:
            # dict(scale=3.0, length=18.0, randomness=None)]
            nx.set_edge_attributes(
                graph,
                name='sketch',
                values=ub.dzip(
                    nontrivial_inferred_edges,
                    [dict(scale=10.0, length=64.0, randomness=None)]))

        # Make dummy edges more transparent
        # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(dummy_edges, [alpha_low]))
        selected_edges = kwargs.pop('selected_edges', None)

        # SHADOW: based on most recent
        # Increase visibility of nodes with the most recently changed timestamp
        if show_recent_review and edge_to_reviewid and selected_edges is None:
            review_ids = list(edge_to_reviewid.values())
            recent_idxs = ub.argmax(review_ids, multi=True)
            recent_edges = list(
                ub.take(list(edge_to_reviewid.keys()), recent_idxs))
            selected_edges = recent_edges

        if selected_edges is not None:
            # TODO: add photoshop-like parameters like
            # spread and size. offset is the same as angle and distance.
            nx.set_edge_attributes(
                graph,
                name='shadow',
                values=ub.dzip(
                    selected_edges,
                    [{
                        'rho': .3,
                        'alpha': .6,
                        'shadow_color': 'w' if dark_background else 'k',
                        'offset': (0, 0),
                        'scale': 3.0,
                    }]))

        # Z_ORDER: make sure nodes are on top
        nodes = list(graph.nodes())
        nx.set_node_attributes(graph,
                               name='zorder',
                               values=ub.dzip(nodes, [10]))
        nx.set_edge_attributes(graph,
                               name='zorder',
                               values=ub.dzip(edges, [0]))
        nx.set_edge_attributes(graph,
                               name='picker',
                               values=ub.dzip(edges, [10]))

        # VISIBILITY: Set visibility of edges based on arguments
        if not show_reviewed_edges:
            infr.print('Making reviewed edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(reviewed_edges, ['invis']))

        if not show_unreviewed_edges:
            infr.print('Making un-reviewed edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(unreviewed_edges, ['invis']))

        if not show_inferred_same:
            infr.print('Making nontrivial_same edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(nontrivial_inferred_same,
                                                  ['invis']))

        if not show_inferred_diff:
            infr.print('Making nontrivial_diff edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(nontrivial_inferred_diff,
                                                  ['invis']))

        if selected_edges is not None:
            # Always show the most recent review (remove setting of invis)
            # infr.print('recent_edges = %r' % (recent_edges,))
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(selected_edges, ['']))

        if reposition:
            # LAYOUT: update the positioning layout
            def get_layoutkw(key, default):
                return kwargs.get(key, graph.graph.get(key, default))

            layoutkw = dict(prog='neato',
                            splines=get_layoutkw('splines', 'line'),
                            fontsize=get_layoutkw('fontsize', None),
                            fontname=get_layoutkw('fontname', None),
                            sep=10 / 72,
                            esep=1 / 72,
                            nodesep=.1)
            layoutkw.update(kwargs)
            # print(ub.repr2(graph.edges))
            try:
                util.nx_agraph_layout(graph, inplace=True, **layoutkw)
            except AttributeError:
                print('WARNING: errors may occur')

        if edge_overrides:
            for key, edge_to_attr in edge_overrides.items():
                nx.set_edge_attributes(graph, name=key, values=edge_to_attr)
        if node_overrides:
            for key, node_to_attr in node_overrides.items():
                nx.set_node_attributes(graph, name=key, values=node_to_attr)
示例#26
0
        'orig_name_label': 5977
    },
    5430: {
        'aid': 5430,
        'name_label': 5977,
        'orig_name_label': 5977
    }
}

graph = nx.Graph(edges)
graph.add_nodes_from(nodes.keys())

df = pd.DataFrame.from_dict(nodes, orient='index')
nx.set_node_attributes(graph,
                       name='orig_name_label',
                       values=ub.dzip(df['aid'], df['orig_name_label']))
nx.set_node_attributes(graph,
                       name='name_label',
                       values=ub.dzip(df['aid'], df['name_label']))

aug_graph = graph
node_to_label = nx.get_node_attributes(graph, 'name_label')

aid1, aid2 = 2265, 2280

label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values())

aug_graph = graph.copy()

# remove cut edges from augmented graph
edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut')
示例#27
0
def coco_from_viame_csv(csv_fpaths, images=None):
    @ub.memoize
    def lazy_image_list():
        if images is None:
            raise Exception('must specify where the image root is')
        if isdir(images):
            image_dpath = images
            all_gpaths = []
            import os
            for root, ds, fs in os.walk(image_dpath):
                IMG_EXT = {'png', 'jpg', 'jpeg', 'tif', 'tiff'}
                gpaths = [join(root, f) for f in fs if f.split('.')[-1].lower() in IMG_EXT]
                if len(gpaths) > 1 and len(ds) != 0:
                    raise Exception('Images must be in a leaf directory')
                if len(all_gpaths) > 0:
                    raise Exception('Images cannot be nested ATM')
                all_gpaths += gpaths
            all_gpaths = sorted(all_gpaths)
        else:
            raise NotImplementedError

        return all_gpaths

    indexed_images = None

    import kwcoco
    dset = kwcoco.CocoDataset()
    for csv_fpath in csv_fpaths:
        with open(csv_fpath, 'r') as file:
            text = file.read()
        lines = [line.strip() for line in text.split('\n')]
        lines = [line for line in lines if line and not line.startswith('#')]
        for line in lines:
            parts = line.split(',')
            tid = int(parts[0])
            gname = parts[1]
            frame_index = int(parts[2])

            if gname == '':
                if len(dset.imgs) == 0 or indexed_images:
                    # I GUESS WE ARE SUPPOSED TO GUESS WHAT IMAGE IS WHICH
                    if not indexed_images:
                        indexed_images = lazy_image_list()
                    try:
                        gname = indexed_images[frame_index]
                    except IndexError:
                        continue
                else:
                    # Also, VIAME-CSV lets the annotations run longer than the
                    # image sequence, so account for that.
                    # Skip this annotation
                    continue

            tl_x, tl_y, br_x, br_y = map(float, parts[3:7])
            w = br_x - tl_x
            h = br_y - tl_y
            bbox = [tl_x, tl_y, w, h]
            score = float(parts[7])
            target_len = float(parts[8])

            rest = parts[9:]
            catparts = []
            rest_iter = iter(rest)
            for p in rest_iter:
                if p.startswith('('):
                    catparts.append(p)

            final_parts = list(rest_iter)
            if final_parts:
                raise NotImplementedError

            catnames = rest[0::2]
            catscores = list(map(float, rest[1::2]))

            cat_to_score = ub.dzip(catnames, catscores)
            if cat_to_score:
                catname = ub.argmax(cat_to_score)
                cid = dset.ensure_category(name=catname)
            else:
                cid = None

            gid = dset.ensure_image(file_name=gname, frame_index=frame_index)
            kw = {}
            if target_len >= 0:
                kw['target_len'] = target_len
            if score >= 0:
                kw['score'] = score

            dset.add_annotation(
                image_id=gid, category_id=cid, track_id=tid, bbox=bbox, **kw
            )
    return dset
示例#28
0
    def __init__(self, blocks_args=None, global_params=None):
        super(EfficientNet, self).__init__()
        assert isinstance(blocks_args, list), 'blocks_args should be a list'
        assert len(blocks_args) > 0, 'block args must be greater than 0'
        self._global_params = global_params
        self._blocks_args = blocks_args

        # Handle class specification
        import ndsampler
        import ubelt as ub
        classes = self._global_params.classes
        if classes is None:
            classes = self._global_params.num_classes
        self.classes = ndsampler.CategoryTree.coerce(classes)

        keys = self._global_params._fields
        vals = list(self._global_params)
        tmp = ub.dzip(keys, vals, cls=ub.odict)
        tmp['num_classes'] = len(self.classes)
        tmp['classes'] = self.classes.__json__()
        self._global_params = type(global_params)(**tmp)

        self.image_size = self._global_params._asdict()['image_size']

        # import ubelt as ub
        # print(ub.repr2(self._global_params._asdict(), nl=-4))
        # print(ub.repr2(self._global_params._asdict()))

        self._initkw = {
            'blocks_args': self._blocks_args,
            'global_params': self._global_params,
        }

        self.model_name = None

        # Get static or dynamic convolution depending on image size
        Conv2d = Conv2dDynamicSamePadding.forsize(
            image_size=global_params.image_size)

        # Batch norm parameters
        bn_mom = 1 - self._global_params.batch_norm_momentum
        bn_eps = self._global_params.batch_norm_epsilon

        # Stem
        in_channels = 3  # rgb
        out_channels = self.round_filters(32)  # number of output channels
        self._conv_stem = Conv2d(in_channels,
                                 out_channels,
                                 kernel_size=3,
                                 stride=2,
                                 bias=False)
        self._bn0 = nn.BatchNorm2d(num_features=out_channels,
                                   momentum=bn_mom,
                                   eps=bn_eps)

        multiplier = global_params.depth_coefficient

        def round_repeats(repeats, multiplier):
            """ Round number of filters based on depth multiplier. """
            if not multiplier:
                return repeats
            return int(math.ceil(multiplier * repeats))

        # Build blocks
        self._blocks = nn.ModuleList([])
        for block_args in self._blocks_args:

            # Update block input and output filters based on depth multiplier.
            block_args = block_args._replace(
                input_filters=self.round_filters(block_args.input_filters),
                output_filters=self.round_filters(block_args.output_filters),
                num_repeat=round_repeats(block_args.num_repeat, multiplier))

            # The first block needs to take care of stride and filter size increase.
            self._blocks.append(MBConvBlock(block_args, self._global_params))
            if block_args.num_repeat > 1:
                block_args = block_args._replace(
                    input_filters=block_args.output_filters, stride=1)
            for _ in range(block_args.num_repeat - 1):
                self._blocks.append(
                    MBConvBlock(block_args, self._global_params))

        # Head
        in_channels = block_args.output_filters  # output of final block
        out_channels = self.round_filters(1280)
        self._conv_head = Conv2d(in_channels,
                                 out_channels,
                                 kernel_size=1,
                                 bias=False)
        self._bn1 = nn.BatchNorm2d(num_features=out_channels,
                                   momentum=bn_mom,
                                   eps=bn_eps)

        # Final linear layer
        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
        self._dropout = nn.Dropout(self._global_params.dropout_rate)
        self._fc = nn.Linear(out_channels, self._global_params.num_classes)
        noli = global_params.noli
        self._noli = layers.rectify_nonlinearity(noli, dim=2)
示例#29
0
def benchmark_nested_break():
    """
    There are several ways to do a nested break, but which one is best?

    https://twitter.com/nedbat/status/1515345787563220996
    """
    import ubelt as ub
    import pandas as pd
    import timerit
    import itertools as it

    def method1_itertools(iter1, iter2):
        for i, j in it.product(iter1, iter2):
            if i == 20 and j == 20:
                break

    def method2_except(iter1, iter2):
        class Found(Exception):
            pass
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise Found
        except Found:
            pass

    class FoundPredef(Exception):
        pass

    def method2_5_except_predef(iter1, iter2):
        try:
            for i in iter1:
                for j in iter2:
                    if i == 20 and j == 20:
                        raise FoundPredef
        except FoundPredef:
            pass

    def method3_gendef(iter1, iter2):
        def genfunc():
            for i in iter1:
                for j in iter2:
                    yield i, j

        for i, j in genfunc():
            if i == 20 and j == 20:
                break

    def method4_genexp(iter1, iter2):
        genexpr = ((i, j) for i in iter1 for j in iter2)
        for i, j in genexpr:
            if i == 20 and j == 20:
                break

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(1000, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    import numpy as np
    basis = {
        'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'],
        # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int),
        'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int),
        'input_style': ['range', 'list', 'customized_iter'],
        # 'param_name': [param values],
    }
    xlabel = 'size'
    xinput_labels = ['n1', 'n2', 'size']

    # Set these to param labels that directly transfer to method kwargs
    kw_labels = []
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['input_style'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    def make_input(params):
        # Given the parameterization make the benchmark function input
        # n1 = params['n1']
        # n2 = params['n2']
        size = params['size']
        n1 = int(np.sqrt(size))
        n2 = int(np.sqrt(size))
        if params['input_style'] == 'list':
            iter1 = list(range(n1))
            iter2 = list(range(n1))
        elif params['input_style'] == 'range':
            iter1 = range(n1)
            iter2 = range(n2)
        elif params['input_style'] == 'customized_iter':
            import random
            def rando1():
                rng1 = random.Random(0)
                for _ in range(n1):
                    yield rng1.randint(0, n2)

            def rando2():
                rng2 = random.Random(1)
                for _ in range(n1):
                    yield rng2.randint(0, n2)

            iter1 = rando1()
            iter2 = rando2()
        else:
            raise KeyError
        return {'iter1': iter1, 'iter2': iter2}

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        # size = params['n1'] * params['n2']
        # params['size'] = size
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(),  kw_labels)

        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            kwargs.update(make_input(params))
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            # chunk_iter = ub.chunks(ti.times, ti.bestof)
            # times = list(map(min, chunk_iter))  # TODO: timerit method for this
            times = ti.robust_times()
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False)
        print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1)))
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw)
        ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}')
        ax.set_xlabel(f'{xlabel}')
        ax.set_ylabel('Time')
        ax.set_xscale('log')
        ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()
示例#30
0
def 数组_合并为字典(items1, items2, cls=dict):
    return ub.dzip(items1, items2, cls)