Пример #1
0
    def update_to_latest_dev_branch(repo, dry=False):
        remote = repo._registered_remote()
        repo._cmd('git fetch {}'.format(remote.name))
        repo.info('Fetch was successful')
        remote_branchnames = [ref.remote_head for ref in remote.refs]
        print('remote_branchnames = {!r}'.format(remote_branchnames))

        # Find all the dev branches
        dev_branches = [
            ref for ref in remote.refs if ref.remote_head.startswith('dev/')
        ]

        version_tuples = [
            tuple(map(int,
                      ref.remote_head.split('dev/')[1].split('.')))
            for ref in dev_branches
        ]
        latest_ref = dev_branches[ub.argmax(version_tuples)]
        latest_branch = latest_ref.remote_head

        if repo.pygit.active_branch.name == latest_branch:
            repo.info('Already on the latest dev branch')
        else:
            try:
                repo._cmd('git checkout {}'.format(latest_branch))
            except ShellException:
                repo.debug(
                    'Checkout failed. Branch name might be ambiguous. Trying again'
                )
                try:
                    repo._cmd('git checkout -b {} {}/{}'.format(
                        latest_branch, remote, latest_branch))
                except ShellException:
                    raise Exception('does the branch exist on the remote?')
Пример #2
0
    def upgrade(repo, dry=False):
        """
        Look for a "dev" branch with a higher version number and switch to that.

        Example:
            >>> from super_setup import *
            >>> import ubelt as ub
            >>> repo = Repo.demo()
            >>> print('repo = {}'.format(repo))
            >>> repo.upgrade()
        """
        remote = repo._registered_remote()
        repo._cmd('git fetch {}'.format(remote.name))
        repo.info('Fetch was successful')
        remote_branchnames = [ref.remote_head for ref in remote.refs]
        print('remote_branchnames = {!r}'.format(remote_branchnames))

        # Find all the dev branches
        dev_branches_ = [
            ref for ref in remote.refs if ref.remote_head.startswith('dev/')
        ]

        dev_branches = []
        version_tuples = []
        for ref in dev_branches_:
            try:
                tup = tuple(
                    map(int,
                        ref.remote_head.split('dev/')[1].split('.')))
                dev_branches.append(ref)
                version_tuples.append(tup)
            except Exception:
                pass

        latest_ref = dev_branches[ub.argmax(version_tuples)]
        latest_branch = latest_ref.remote_head

        if repo.pygit.active_branch.name == latest_branch:
            repo.info('Already on the latest dev branch')
        else:
            try:
                repo._cmd('git checkout {}'.format(latest_branch))
            except ShellException:
                repo.debug(
                    'Checkout failed. Branch name might be ambiguous. Trying again'
                )
                try:
                    repo._cmd('git checkout -b {} {}/{}'.format(
                        latest_branch, remote, latest_branch))
                except ShellException:
                    raise Exception('does the branch exist on the remote?')
Пример #3
0
def _best_prefix_transform(set1, target_set2):
    """
    Find a way to transform prefixes of items in set1 to match target_set2

    Example:
        >>> set1 = {'mod.f.0.w',
        >>>         'mod.f.1.b',
        >>>         'mod.f.1.n',
        >>>         'mod.f.1.rm',
        >>>         'mod.f.1.rv',}
        >>> #
        >>> target_set2 = {
        >>>      'bar.foo.extra.f.1.b',
        >>>      'bar.foo.extra.f.1.n',
        >>>      'bar.foo.extra.f.1.w',
        >>>      'bar.foo.extra.f.3.w',
        >>> }
        >>> _best_prefix_transform(set1, target_set2)
        >>> target_set2.add('JUNK')
        >>> _best_prefix_transform(set1, target_set2)
    """

    # probably an efficient way to do this with a trie

    # NOTE: In general this is a graph-isomorphism problem or a  maximum common
    # subgraph problem. However, we can look only at the special case of
    # "maximum common subtrees". Given two directory structures (as trees)
    # we find the common bits.
    # https://perso.ensta-paris.fr/~diam/ro/online/viggo_wwwcompendium/node168.html
    # We can approximate to O(log log n / log^2 n)
    # Can get algorithm from maximum independent set
    # https://arxiv.org/abs/1602.07210

    # The most efficient algorithm here would be for solving
    # "Maximum common labeled subtrees"
    # APX-hard for unordered trees, but polytime solveable for ordered trees
    # For directory structures we can induce an order, and hense obtain a
    # polytime solution
    # #
    # On the Maximum Common Embedded Subtree Problem for Ordered Trees
    # https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf

    from os.path import commonprefix
    prefixes1 = commonprefix(list(set1)).split('.')
    prefixes2 = commonprefix(list(target_set2)).split('.')

    # Remove the trailing prefixes that are the same
    num_same = 0
    for i in range(1, min(len(prefixes1), len(prefixes2))):
        if prefixes1[-i] == prefixes2[-i]:
            num_same = i
        else:
            break
    prefixes1 = prefixes1[:-num_same]
    prefixes2 = prefixes2[:-num_same]

    ALLOW_FUZZY = 1
    if ALLOW_FUZZY and len(prefixes2) == 0:
        # SUPER HACK FOR CASE WHERE THERE IS JUST ONE SPOILER ELEMENT IN THE
        # TARGET SET. THE ALGORITHM NEEDS TO BE RETHOUGHT FOR THAT CASE
        possible_prefixes = [k.split('.') for k in target_set2]
        prefix_hist = ub.ddict(lambda: 0)
        for item in possible_prefixes:
            for i in range(1, len(item)):
                prefix_hist[tuple(item[0:i])] += 1
        prefixes2 = ['.'.join(ub.argmax(prefix_hist))]

    def add_prefix(items, prefix):
        return {prefix + k for k in items}

    def remove_prefix(items, prefix):
        return {k[len(prefix):] if k.startswith(prefix) else k for k in items}

    import itertools as it
    found_cand = []
    for i1, i2 in it.product(range(len(prefixes1) + 1),
                             range(len(prefixes2) + 1)):
        if i1 == 0 and i2 == 0:
            continue
        # Very inefficient, we should be able to do better
        prefix1 = '.'.join(prefixes1[:i1])
        prefix2 = '.'.join(prefixes2[:i2])
        if prefix1:
            prefix1 = prefix1 + '.'
        if prefix2:
            prefix2 = prefix2 + '.'

        # We are allowed to remove a prefix from a set, add the other
        # prefix to the set, or remove and then add.
        set1_cand1 = remove_prefix(set1, prefix1)
        set1_cand2 = add_prefix(set1, prefix2)
        set1_cand3 = add_prefix(set1_cand1, prefix2)

        common1 = set1_cand1 & target_set2
        common2 = set1_cand2 & target_set2
        common3 = set1_cand3 & target_set2
        if common1:
            found_cand.append({
                'transform': [('remove', prefix1)],
                'value': len(common1),
            })
        if common2:
            found_cand.append({
                'transform': [('add', prefix2)],
                'value': len(common2),
            })
        if common3:
            found_cand.append({
                'transform': [('remove', prefix1), ('add', prefix2)],
                'value':
                len(common3),
            })
    if len(found_cand):
        found = max(found_cand, key=lambda x: x['value'])
    else:
        found = None
    return found
Пример #4
0
def coco_from_viame_csv(csv_fpaths, images=None):
    @ub.memoize
    def lazy_image_list():
        if images is None:
            raise Exception('must specify where the image root is')
        if isdir(images):
            image_dpath = images
            all_gpaths = []
            import os
            for root, ds, fs in os.walk(image_dpath):
                IMG_EXT = {'png', 'jpg', 'jpeg', 'tif', 'tiff'}
                gpaths = [join(root, f) for f in fs if f.split('.')[-1].lower() in IMG_EXT]
                if len(gpaths) > 1 and len(ds) != 0:
                    raise Exception('Images must be in a leaf directory')
                if len(all_gpaths) > 0:
                    raise Exception('Images cannot be nested ATM')
                all_gpaths += gpaths
            all_gpaths = sorted(all_gpaths)
        else:
            raise NotImplementedError

        return all_gpaths

    indexed_images = None

    import kwcoco
    dset = kwcoco.CocoDataset()
    for csv_fpath in csv_fpaths:
        with open(csv_fpath, 'r') as file:
            text = file.read()
        lines = [line.strip() for line in text.split('\n')]
        lines = [line for line in lines if line and not line.startswith('#')]
        for line in lines:
            parts = line.split(',')
            tid = int(parts[0])
            gname = parts[1]
            frame_index = int(parts[2])

            if gname == '':
                if len(dset.imgs) == 0 or indexed_images:
                    # I GUESS WE ARE SUPPOSED TO GUESS WHAT IMAGE IS WHICH
                    if not indexed_images:
                        indexed_images = lazy_image_list()
                    try:
                        gname = indexed_images[frame_index]
                    except IndexError:
                        continue
                else:
                    # Also, VIAME-CSV lets the annotations run longer than the
                    # image sequence, so account for that.
                    # Skip this annotation
                    continue

            tl_x, tl_y, br_x, br_y = map(float, parts[3:7])
            w = br_x - tl_x
            h = br_y - tl_y
            bbox = [tl_x, tl_y, w, h]
            score = float(parts[7])
            target_len = float(parts[8])

            rest = parts[9:]
            catparts = []
            rest_iter = iter(rest)
            for p in rest_iter:
                if p.startswith('('):
                    catparts.append(p)

            final_parts = list(rest_iter)
            if final_parts:
                raise NotImplementedError

            catnames = rest[0::2]
            catscores = list(map(float, rest[1::2]))

            cat_to_score = ub.dzip(catnames, catscores)
            if cat_to_score:
                catname = ub.argmax(cat_to_score)
                cid = dset.ensure_category(name=catname)
            else:
                cid = None

            gid = dset.ensure_image(file_name=gname, frame_index=frame_index)
            kw = {}
            if target_len >= 0:
                kw['target_len'] = target_len
            if score >= 0:
                kw['score'] = score

            dset.add_annotation(
                image_id=gid, category_id=cid, track_id=tid, bbox=bbox, **kw
            )
    return dset
Пример #5
0
    def try_auto_review(infr, edge):
        review = {
            'user_id': 'algo:auto_clf',
            'confidence': const.CONFIDENCE.CODE.PRETTY_SURE,
            'evidence_decision': None,
            'meta_decision': NULL,
            'timestamp_s1': None,
            'timestamp_c1': None,
            'timestamp_c2': None,
            'tags': [],
        }
        if infr.is_recovering():
            # Do not autoreview if we are in an inconsistent state
            infr.print('Must manually review inconsistent edge', 3)
            return None
        # Determine if anything passes the match threshold
        primary_task = 'match_state'

        try:
            decision_probs = infr.task_probs[primary_task][edge]
        except KeyError:
            if infr.verifiers is None:
                return None
            if infr.verifiers.get(primary_task, None) is None:
                return None
            # Compute probs if they haven't been done yet
            infr.ensure_priority_scores([edge])
            try:
                decision_probs = infr.task_probs[primary_task][edge]
            except KeyError:
                return None

        primary_thresh = infr.task_thresh[primary_task]
        decision_flags = {
            k: decision_probs[k] > thresh
            for k, thresh in primary_thresh.items()
        }
        hasone = sum(decision_flags.values()) == 1
        auto_flag = False
        if hasone:
            try:
                # Check to see if it might be confounded by a photobomb
                pb_probs = infr.task_probs['photobomb_state'][edge]
                # pb_probs = infr.task_probs['photobomb_state'].loc[edge]
                # pb_probs = data['task_probs']['photobomb_state']
                pb_thresh = infr.task_thresh['photobomb_state']['pb']
                confounded = pb_probs['pb'] > pb_thresh
            except KeyError:
                print(
                    'Warning: confounding task probs not set (i.e. photobombs)'
                )
                confounded = False
            if not confounded:
                # decision = decision_flags.argmax()
                evidence_decision = ub.argmax(decision_probs)
                review['evidence_decision'] = evidence_decision
                # truth = infr.match_state_gt(edge)
                truth = infr.dummy_verif._get_truth(edge)
                if review['evidence_decision'] != truth:
                    infr.print(
                        'AUTOMATIC ERROR edge={}, truth={}, decision={}, probs={}'
                        .format(edge, truth, review['evidence_decision'],
                                decision_probs),
                        2,
                        color='darkred')
                auto_flag = True
        if auto_flag and infr.verbose > 1:
            infr.print('Automatic review success')

        if auto_flag:
            return review
        else:
            return None
Пример #6
0
    def update_visual_attrs(infr,
                            graph=None,
                            show_reviewed_edges=True,
                            show_unreviewed_edges=False,
                            show_inferred_diff=True,
                            show_inferred_same=True,
                            show_recent_review=False,
                            highlight_reviews=True,
                            show_inconsistency=True,
                            wavy=False,
                            simple_labels=False,
                            show_labels=True,
                            reposition=True,
                            use_image=False,
                            edge_overrides=None,
                            node_overrides=None,
                            colorby='name_label',
                            **kwargs
                            # hide_unreviewed_inferred=True
                            ):
        infr.print('update_visual_attrs', 3)
        if graph is None:
            graph = infr.graph
        # if hide_cuts is not None:
        #     # show_unreviewed_cuts = not hide_cuts
        #     show_reviewed_cuts = not hide_cuts

        if not getattr(infr, '_viz_init_nodes', False):
            infr._viz_init_nodes = True
            nx.set_node_attributes(graph, name='shape', values='circle')
            # infr.set_node_attrs('shape', 'circle')

        if getattr(infr, '_viz_image_config_dirty', True):
            infr.update_node_image_attribute(graph=graph, use_image=use_image)

        def get_any(dict_, keys, default=None):
            for key in keys:
                if key in dict_:
                    return dict_[key]
            return default

        show_cand = get_any(
            kwargs, ['show_candidate_edges', 'show_candidates', 'show_cand'])
        if show_cand is not None:
            show_cand = True
            show_reviewed_edges = True
            show_unreviewed_edges = True
            show_inferred_diff = True
            show_inferred_same = True

        if kwargs.get('show_all'):
            show_cand = True

        # alpha_low = .5
        alpha_med = .9
        alpha_high = 1.0

        dark_background = graph.graph.get('dark_background', None)

        # Ensure we are starting from a clean slate
        # if reposition:
        util.nx_delete_edge_attr(graph, infr.visual_edge_attrs_appearance)

        # Set annotation node labels
        node_to_nid = None
        if not show_labels:
            nx.set_node_attributes(graph,
                                   name='label',
                                   values=ub.dzip(graph.nodes(), ['']))
        else:
            if simple_labels:
                nx.set_node_attributes(
                    graph,
                    name='label',
                    values={n: str(n)
                            for n in graph.nodes()})
            else:
                if node_to_nid is None:
                    node_to_nid = nx.get_node_attributes(graph, 'name_label')
                node_to_view = nx.get_node_attributes(graph, 'viewpoint')
                if node_to_view:
                    annotnode_to_label = {
                        aid: 'aid=%r%s\nnid=%r' %
                        (aid, node_to_view[aid], node_to_nid[aid])
                        for aid in graph.nodes()
                    }
                else:
                    annotnode_to_label = {
                        aid: 'aid=%r\nnid=%r' % (aid, node_to_nid[aid])
                        for aid in graph.nodes()
                    }
                nx.set_node_attributes(graph,
                                       name='label',
                                       values=annotnode_to_label)

        # NODE_COLOR: based on name_label
        color_nodes(graph,
                    labelattr=colorby,
                    outof=kwargs.get('outof', None),
                    sat_adjust=-.4)

        # EDGES:
        # Grab different types of edges
        edges, edge_colors = infr.get_colored_edge_weights(
            graph, highlight_reviews)

        # reviewed_states = nx.get_edge_attributes(graph, 'evidence_decision')
        reviewed_states = {
            e: infr.edge_decision(e)
            for e in infr.graph.edges()
        }
        edge_to_inferred_state = nx.get_edge_attributes(
            graph, 'inferred_state')
        # dummy_edges = [edge for edge, flag in
        #                nx.get_edge_attributes(graph, '_dummy_edge').items()
        #                if flag]
        edge_to_reviewid = nx.get_edge_attributes(graph, 'review_id')
        recheck_edges = [
            edge for edge, split in nx.get_edge_attributes(
                graph, 'maybe_error').items() if split
        ]
        decision_to_edge = util.group_pairs(reviewed_states.items())
        neg_edges = decision_to_edge[NEGTV]
        pos_edges = decision_to_edge[POSTV]
        incomp_edges = decision_to_edge[INCMP]
        unreviewed_edges = decision_to_edge[UNREV]

        inferred_same = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'same'
        ]
        inferred_diff = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'diff'
        ]
        inconsistent_external = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'inconsistent_external'
        ]
        inferred_notcomp = [
            edge for edge, state in edge_to_inferred_state.items()
            if state == 'notcomp'
        ]

        reviewed_edges = incomp_edges + pos_edges + neg_edges
        compared_edges = pos_edges + neg_edges
        uncompared_edges = util.setdiff(edges, compared_edges)
        nontrivial_inferred_same = util.setdiff(
            inferred_same, pos_edges + neg_edges + incomp_edges)
        nontrivial_inferred_diff = util.setdiff(
            inferred_diff, pos_edges + neg_edges + incomp_edges)
        nontrivial_inferred_edges = (nontrivial_inferred_same +
                                     nontrivial_inferred_diff)

        # EDGE_COLOR: based on edge_weight
        nx.set_edge_attributes(graph,
                               name='color',
                               values=ub.dzip(edges, edge_colors))

        # LINE_WIDTH: based on review_state
        # unreviewed_width = 2.0
        # reviewed_width = 5.0
        unreviewed_width = 1.0
        reviewed_width = 2.0
        if highlight_reviews:
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(reviewed_edges,
                                                  [reviewed_width]))
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(unreviewed_edges,
                                                  [unreviewed_width]))
        else:
            nx.set_edge_attributes(graph,
                                   name='linewidth',
                                   values=ub.dzip(edges, [unreviewed_width]))

        # EDGE_STROKE: based on decision and maybe_error
        # fg = util.WHITE if dark_background else util.BLACK
        # nx.set_edge_attributes(graph, name='stroke', values=ub.dzip(reviewed_edges, [{'linewidth': 3, 'foreground': fg}]))
        if show_inconsistency:
            nx.set_edge_attributes(
                graph,
                name='stroke',
                values=ub.dzip(recheck_edges, [{
                    'linewidth': 5,
                    'foreground': infr._error_color
                }]))

        # Set linestyles to emphasize PCCs
        # Dash lines between PCCs inferred to be different
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inferred_diff, ['dashed']))

        # Treat incomparable/incon-external inference as different
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inferred_notcomp, ['dashed']))
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(inconsistent_external,
                                              ['dashed']))

        # Dot lines that we are unsure of
        nx.set_edge_attributes(graph,
                               name='linestyle',
                               values=ub.dzip(unreviewed_edges, ['dotted']))

        # Cut edges are implicit and dashed
        # nx.set_edge_attributes(graph, name='implicit', values=ub.dzip(cut_edges, [True]))
        # nx.set_edge_attributes(graph, name='linestyle', values=ub.dzip(cut_edges, ['dashed']))
        # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(cut_edges, [alpha_med]))

        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(uncompared_edges, [True]))

        # Only matching edges should impose constraints on the graph layout
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(neg_edges, [True]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(neg_edges, [alpha_med]))
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(incomp_edges, [True]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(incomp_edges, [alpha_med]))

        # Ensure reviewed edges are visible
        nx.set_edge_attributes(graph,
                               name='implicit',
                               values=ub.dzip(reviewed_edges, [False]))
        nx.set_edge_attributes(graph,
                               name='alpha',
                               values=ub.dzip(reviewed_edges, [alpha_high]))

        if True:
            # Infered same edges can be allowed to constrain in order
            # to make things look nice sometimes
            nx.set_edge_attributes(graph,
                                   name='implicit',
                                   values=ub.dzip(inferred_same, [False]))
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_same, [alpha_high]))

        if not kwargs.get('show_same', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_same, [0]))

        if not kwargs.get('show_diff', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(inferred_diff, [0]))

        if not kwargs.get('show_positive_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(pos_edges, [0]))

        if not kwargs.get('show_negative_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(neg_edges, [0]))

        if not kwargs.get('show_incomparable_edges', True):
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(incomp_edges, [0]))

        if not kwargs.get('show_between', True):
            if node_to_nid is None:
                node_to_nid = nx.get_node_attributes(graph, 'name_label')
            between_edges = [(u, v) for u, v in edges
                             if node_to_nid[u] != node_to_nid[v]]
            nx.set_edge_attributes(graph,
                                   name='alpha',
                                   values=ub.dzip(between_edges, [0]))

        # SKETCH: based on inferred_edges
        # Make inferred edges wavy
        if wavy:
            # dict(scale=3.0, length=18.0, randomness=None)]
            nx.set_edge_attributes(
                graph,
                name='sketch',
                values=ub.dzip(
                    nontrivial_inferred_edges,
                    [dict(scale=10.0, length=64.0, randomness=None)]))

        # Make dummy edges more transparent
        # nx.set_edge_attributes(graph, name='alpha', values=ub.dzip(dummy_edges, [alpha_low]))
        selected_edges = kwargs.pop('selected_edges', None)

        # SHADOW: based on most recent
        # Increase visibility of nodes with the most recently changed timestamp
        if show_recent_review and edge_to_reviewid and selected_edges is None:
            review_ids = list(edge_to_reviewid.values())
            recent_idxs = ub.argmax(review_ids, multi=True)
            recent_edges = list(
                ub.take(list(edge_to_reviewid.keys()), recent_idxs))
            selected_edges = recent_edges

        if selected_edges is not None:
            # TODO: add photoshop-like parameters like
            # spread and size. offset is the same as angle and distance.
            nx.set_edge_attributes(
                graph,
                name='shadow',
                values=ub.dzip(
                    selected_edges,
                    [{
                        'rho': .3,
                        'alpha': .6,
                        'shadow_color': 'w' if dark_background else 'k',
                        'offset': (0, 0),
                        'scale': 3.0,
                    }]))

        # Z_ORDER: make sure nodes are on top
        nodes = list(graph.nodes())
        nx.set_node_attributes(graph,
                               name='zorder',
                               values=ub.dzip(nodes, [10]))
        nx.set_edge_attributes(graph,
                               name='zorder',
                               values=ub.dzip(edges, [0]))
        nx.set_edge_attributes(graph,
                               name='picker',
                               values=ub.dzip(edges, [10]))

        # VISIBILITY: Set visibility of edges based on arguments
        if not show_reviewed_edges:
            infr.print('Making reviewed edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(reviewed_edges, ['invis']))

        if not show_unreviewed_edges:
            infr.print('Making un-reviewed edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(unreviewed_edges, ['invis']))

        if not show_inferred_same:
            infr.print('Making nontrivial_same edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(nontrivial_inferred_same,
                                                  ['invis']))

        if not show_inferred_diff:
            infr.print('Making nontrivial_diff edges invisible', 10)
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(nontrivial_inferred_diff,
                                                  ['invis']))

        if selected_edges is not None:
            # Always show the most recent review (remove setting of invis)
            # infr.print('recent_edges = %r' % (recent_edges,))
            nx.set_edge_attributes(graph,
                                   name='style',
                                   values=ub.dzip(selected_edges, ['']))

        if reposition:
            # LAYOUT: update the positioning layout
            def get_layoutkw(key, default):
                return kwargs.get(key, graph.graph.get(key, default))

            layoutkw = dict(prog='neato',
                            splines=get_layoutkw('splines', 'line'),
                            fontsize=get_layoutkw('fontsize', None),
                            fontname=get_layoutkw('fontname', None),
                            sep=10 / 72,
                            esep=1 / 72,
                            nodesep=.1)
            layoutkw.update(kwargs)
            # print(ub.repr2(graph.edges))
            try:
                util.nx_agraph_layout(graph, inplace=True, **layoutkw)
            except AttributeError:
                print('WARNING: errors may occur')

        if edge_overrides:
            for key, edge_to_attr in edge_overrides.items():
                nx.set_edge_attributes(graph, name=key, values=edge_to_attr)
        if node_overrides:
            for key, node_to_attr in node_overrides.items():
                nx.set_node_attributes(graph, name=key, values=node_to_attr)
Пример #7
0
    def _training_sample_weights(self):
        """
        Assigns weighting to each image to includence sample probability.

        We want to see very frequent categories less often,
        but we also don't really care about the rarest classes to the point
        where we should smaple them more than uncommon classes.  We also don't
        want to sample images without any or with too many annotations very
        often.
        """
        index_to_gid = [img['id'] for img in self.dset.dataset['images']]
        index_to_aids = list(ub.take(self.dset.gid_to_aids, index_to_gid))
        index_to_cids = [[self.dset.anns[aid]['category_id'] for aid in aids]
                         for aids in index_to_aids]

        catname_to_cid = {
            cat['name']: cid
            for cid, cat in self.dset.cats.items()}

        # median frequency weighting with minimum threshold
        min_examples = 20
        cat_freq = pd.Series(self.dset.category_annotation_frequency())

        valid_freq = cat_freq[cat_freq > min_examples]
        normal_mfw = valid_freq.median() / valid_freq

        # Draw anything under the threshold with probability equal to the median
        too_few = cat_freq[(cat_freq <= min_examples) & (cat_freq > 0)]
        too_few[:] = 1.0
        category_mfw = pd.concat([normal_mfw, too_few])

        cid_to_mfw = category_mfw.rename(catname_to_cid)

        cid_to_mfw_dict = cid_to_mfw.to_dict()

        index_to_weights = [list(ub.take(cid_to_mfw_dict, cids)) for cids in index_to_cids]
        index_to_nannots = np.array(list(map(len, index_to_weights)))

        # Each image becomes represented by the category with maximum median
        # frequency weight. This allows us to assign each image a proxy class
        # We make another proxy class to represent images without anything in
        # them.
        EMPTY_PROXY_CID = -1
        index_to_proxyid = [
            # cid_to_mfw.loc[cids].idxmax()
            ub.argmax(ub.dict_subset(cid_to_mfw_dict, cids))
            if len(cids) else EMPTY_PROXY_CID
            for cids in index_to_cids
        ]

        proxy_freq = pd.Series(ub.dict_hist(index_to_proxyid))
        proxy_root_mfw = proxy_freq.median() / proxy_freq
        power = 0.878
        proxy_root_mfw = proxy_root_mfw ** power
        # We now have a weight for each item in out dataset
        index_to_weight = np.array(list(ub.take(proxy_root_mfw.to_dict(), index_to_proxyid)))

        if False:
            # Figure out how the likelihoods of each class change
            xy = {}
            for power in [0, .5, .878, 1]:
                proxy_root_mfw = proxy_freq.median() / proxy_freq
                # dont let weights get too high
                # proxy_root_mfw = np.sqrt(proxy_root_mfw)
                # power = .88
                proxy_root_mfw = proxy_root_mfw ** power
                # proxy_root_mfw = np.clip(proxy_root_mfw, a_min=None, a_max=3)

                index_to_weight = list(ub.take(proxy_root_mfw.to_dict(), index_to_proxyid))

                if 1:
                    # what is the probability we draw an empty image?
                    df = pd.DataFrame({
                        'nannots': index_to_nannots,
                        'weight': index_to_weight,
                    })
                    df['prob'] = df.weight / df.weight.sum()

                    prob_empty = df.prob[df.nannots == 0].sum()

                    probs = {'empty': prob_empty}
                    for cid in cid_to_mfw.index:
                        flags = [cid in cids for cids in index_to_cids]
                        catname = self.dset.cats[cid]['name']
                        p = df[flags].prob.sum()
                        probs[catname] = p
                    xy['p{}'.format(power)] = pd.Series(probs)
            xy['freq'] = {}
            for cid in cid_to_mfw.index:
                catname = self.dset.cats[cid]['name']
                xy['freq'][catname] = proxy_freq[cid]
            print(pd.DataFrame(xy))

        # index_to_prob = index_to_weight / index_to_weight.sum()
        return index_to_weight
Пример #8
0
def _setcover_greedy_old(candidate_sets_dict,
                         items=None,
                         set_weights=None,
                         item_values=None,
                         max_weight=None):
    """
    Benchmark:
        items = np.arange(10000)
        candidate_sets_dict = {}
        for i in range(1000):
            candidate_sets_dict[i] = np.random.choice(items, 200).tolist()

        _setcover_greedy_new(candidate_sets_dict) == _setcover_greedy_old(candidate_sets_dict)
        _ = nh.util.profile_onthefly(_setcover_greedy_new)(candidate_sets_dict)
        _ = nh.util.profile_onthefly(_setcover_greedy_old)(candidate_sets_dict)

        import ubelt as ub
        for timer in ub.Timerit(3, bestof=1, label='time'):
            with timer:
                len(_setcover_greedy_new(candidate_sets_dict))

        import ubelt as ub
        for timer in ub.Timerit(3, bestof=1, label='time'):
            with timer:
                len(_setcover_greedy_old(candidate_sets_dict))
    """
    solution_cover = {}

    if len(candidate_sets_dict) == 0:
        # O(1) optimal solution, we did it!
        return solution_cover

    # If candset_weights or item_values not given use the length as defaults
    if set_weights is None:
        get_weight = len
    else:

        def get_weight(solution_cover):
            return sum(set_weights[key] for key in solution_cover.keys())

    if item_values is None:
        get_value = len
    else:

        def get_value(vals):
            return sum(item_values[v] for v in vals)

    if max_weight is None:
        max_weight = get_weight(candidate_sets_dict)

    avail_covers = {key: set(val) for key, val in candidate_sets_dict.items()}
    avail_keys, avail_vals = zip(*sorted(avail_covers.items()))
    avail_keys = list(avail_keys)
    avail_vals = list(avail_vals)

    # While we still need covers
    while get_weight(solution_cover) < max_weight and len(avail_keys) > 0:
        # Find candiate set with the most uncovered items
        uncovered_values = list(map(get_value, avail_vals))
        chosen_idx = ub.argmax(uncovered_values)
        if uncovered_values[chosen_idx] <= 0:
            # needlessly adding value-less items
            break
        chosen_key = avail_keys[chosen_idx]
        # Add values in this key to the cover
        chosen_set = avail_covers[chosen_key]
        solution_cover[chosen_key] = candidate_sets_dict[chosen_key]
        # Remove chosen set from available options and covered items
        # from remaining available sets
        del avail_keys[chosen_idx]
        del avail_vals[chosen_idx]
        for vals in avail_vals:
            vals.difference_update(chosen_set)
    return solution_cover
Пример #9
0
def _setcover_greedy_new(candidate_sets_dict,
                         items=None,
                         set_weights=None,
                         item_values=None,
                         max_weight=None):
    """
    Implements Johnson's / Chvatal's greedy set-cover approximation algorithms.

    The approximation gaurentees depend on specifications of set weights and
    item values

    Running time:
        N = number of universe items
        C = number of candidate covering sets

        Worst case running time is: O(C^2 * CN)
            (note this is via simple analysis, the big-oh might be better)

    Set Cover: log(len(items) + 1) approximation algorithm
    Weighted Maximum Cover: 1 - 1/e == .632 approximation algorithm
    Generalized maximum coverage is not implemented

    References:
        https://en.wikipedia.org/wiki/Maximum_coverage_problem

    Notes:
        # pip install git+git://github.com/tangentlabs/django-oscar.git#egg=django-oscar.
        # TODO: wrap https://github.com/martin-steinegger/setcover/blob/master/SetCover.cpp
        # pip install SetCoverPy
        # This is actually much slower than my implementation
        from SetCoverPy import setcover
        g = setcover.SetCover(full_overlaps, cost=np.ones(len(full_overlaps)))
        g.greedy()
        keep = np.where(g.s)[0]

    Example:
        >>> candidate_sets_dict = {
        >>>     'a': [1, 2, 3, 8, 9, 0],
        >>>     'b': [1, 2, 3, 4, 5],
        >>>     'c': [4, 5, 7],
        >>>     'd': [5, 6, 7],
        >>>     'e': [6, 7, 8, 9, 0],
        >>> }
        >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict)
        >>> #print(repr(greedy_soln))
        ...
        >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
        greedy_soln = {'a': [1, 2, 3, 8, 9, 0], 'c': [4, 5, 7], 'd': [5, 6, 7]}

    Example:
        >>> candidate_sets_dict = {
        >>>     'a': [1, 2, 3, 8, 9, 0],
        >>>     'b': [1, 2, 3, 4, 5],
        >>>     'c': [4, 5, 7],
        >>>     'd': [5, 6, 7],
        >>>     'e': [6, 7, 8, 9, 0],
        >>> }
        >>> items = list(set(it.chain(*candidate_sets_dict.values())))
        >>> set_weights = {i: 1 for i in candidate_sets_dict.keys()}
        >>> item_values = {e: 1 for e in items}
        >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict,
        >>>                             item_values=item_values,
        >>>                             set_weights=set_weights)
        >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
        greedy_soln = {'a': [1, 2, 3, 8, 9, 0], 'c': [4, 5, 7], 'd': [5, 6, 7]}

    Example:
        >>> candidate_sets_dict = {}
        >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict)
        >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
        greedy_soln = {}
    """
    if len(candidate_sets_dict) == 0:
        # O(1) optimal solution, we did it!
        return {}

    solution_cover = {}
    solution_weight = 0

    if items is None:
        items = list(set(it.chain(*candidate_sets_dict.values())))

    # Inverted index
    item_to_keys = {item: set() for item in items}
    # This is actually a fair bit faster than the non-comprehension version
    [
        item_to_keys[item].add(key)
        for key, vals in candidate_sets_dict.items() for item in vals
    ]

    # If set_weights or item_values not given use the length as defaults
    if set_weights is None:
        get_weight = len
    else:
        # TODO: we can improve this with bookkeeping
        def get_weight(solution_cover):
            return sum(set_weights[key] for key in solution_cover.keys())

    if item_values is None:
        get_value = len
    else:

        def get_value(vals):
            return sum(item_values[v] for v in vals)

    if max_weight is None:
        max_weight = get_weight(candidate_sets_dict)

    avail_covers = OrderedDict([
        (key, set(vals)) for key, vals in sorted(candidate_sets_dict.items())
    ])
    avail_totals = OrderedDict([(key, get_value(vals))
                                for key, vals in avail_covers.items()])

    print('avail_covers = {}'.format(ub.repr2(avail_covers, nl=1)))
    print('avail_totals = {}'.format(ub.repr2(avail_totals, nl=1)))

    # While we still need covers
    while solution_weight < max_weight and len(avail_covers) > 0:
        # Find candiate set with the most valuable uncovered items
        chosen_key = ub.argmax(avail_totals)
        if avail_totals[chosen_key] <= 0:
            # needlessly adding value-less covering set
            break

        print('-----')
        print('CHOOSE COVER SET = {!r}'.format(chosen_key))

        # Add values in this key to the cover
        chosen_items = avail_covers[chosen_key]
        solution_cover[chosen_key] = candidate_sets_dict[chosen_key]

        # Update the solution weight
        chosen_weight = (1 if set_weights is None else set_weights[chosen_key])
        solution_weight += chosen_weight

        # Remove chosen covering set from available options
        del avail_covers[chosen_key]
        del avail_totals[chosen_key]

        # For each chosen item, find the other sets that it belongs to
        modified_keys = set()
        for item in chosen_items:
            # Update the inverted index
            new_keys = item_to_keys[item]
            new_keys.remove(chosen_key)
            item_to_keys[item] = new_keys
            # And mark the non-chosen reamining cover sets as modified
            modified_keys.update(new_keys)
        # Then update and recompute the value of the modified sets
        for key in modified_keys:
            avail_covers[key].difference_update(chosen_items)
            newval = get_value(avail_covers[key])
            avail_totals[key] = newval

        print('avail_covers = {}'.format(ub.repr2(avail_covers, nl=1)))
        print('avail_totals = {}'.format(ub.repr2(avail_totals, nl=1)))

    print('solution_cover = {!r}'.format(solution_cover))
    return solution_cover