Пример #1
0
    def init_test_mode(infr):
        from ibeis.algo.graph import nx_dynamic_graph
        infr.print('init_test_mode')
        infr.test_mode = True
        # infr.edge_truth = {}
        infr.metrics_list = []
        infr.test_state = {
            'n_decision': 0,
            'n_algo': 0,
            'n_manual': 0,
            'n_true_merges': 0,
            'n_error_edges': 0,
            'confusion': None,
        }
        infr.test_gt_pos_graph = nx_dynamic_graph.DynConnGraph()
        infr.test_gt_pos_graph.add_nodes_from(infr.aids)
        infr.nid_to_gt_cc = ut.group_items(infr.aids, infr.orig_name_labels)
        infr.node_truth = ut.dzip(infr.aids, infr.orig_name_labels)

        # infr.real_n_pcc_mst_edges = sum(
        #     len(cc) - 1 for cc in infr.nid_to_gt_cc.values())
        # ut.cprint('real_n_pcc_mst_edges = %r' % (
        #     infr.real_n_pcc_mst_edges,), 'red')

        infr.metrics_list = []
        infr.nid_to_gt_cc = ut.group_items(infr.aids, infr.orig_name_labels)
        infr.real_n_pcc_mst_edges = sum(
            len(cc) - 1 for cc in infr.nid_to_gt_cc.values())
        infr.print('real_n_pcc_mst_edges = %r' % (
            infr.real_n_pcc_mst_edges,), color='red')
Пример #2
0
 def compute_annot_occurrence_ids(ibs, aid_list):
     from ibeis.algo.preproc import preproc_occurrence
     gid_list = ibs.get_annot_gids(aid_list)
     gid2_aids = ut.group_items(aid_list, gid_list)
     flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
     occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
     occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
     return occurid2_aids
Пример #3
0
    def find_duplicates(index):
        # fpaths = list(index.files.keys())
        files = list(index.files.values())
        print('Grouping {} files'.format(len(files)))
        grouped = ut.group_items(files, [f.nbytes for f in files])
        print('Found {} groups'.format(len(grouped)))
        potential_dups = {k: v for k, v in grouped.items() if len(v) > 1}
        print('Found {} potential dups by nbytes'.format(len(potential_dups)))

        GB = 2**30  # NOQA
        MB = 2**20  # NOQA
        max_bytes = 10 * MB
        min_bytes = 64 * MB

        duplicates = []
        for k, fs in ut.ProgIter(potential_dups.items(), freq=1):
            names = [f.n for f in fs]
            if ut.allsame(names):
                # Don't do big files yet
                if k < max_bytes and k > min_bytes:
                    if ut.allsame([f.hashid for f in fs]):
                        duplicates.extend(fs)
                        for f1, f2 in ut.combinations(fs, 2):
                            f1.duplicates.add(f2)
                            f2.duplicates.add(f1)

        def dpath_similarity(index, dpath1, dpath2):
            d1 = index[dpath1]
            d2 = index[dpath2]
            set1 = {f.hashid for f in ut.ProgIter(d1.files)}
            set2 = {f.hashid for f in ut.ProgIter(d2.files)}
            # n_isect = len(set1.intersection(set2))
            size1, size2 = map(len, (set1, set2))
            # minsize = min(size1, size2)
            # sim_measures = (n_isect, n_isect / minsize)
            return ut.set_overlaps(set1, set2)
            # return sim_measures

        similarities = {}
        r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates])
        for dpath, dups in r_to_dup.items():
            # Check to see if the duplicates all point to the same dir
            f = dups[0]  # NOQA
            common_dpath = set.intersection(*[{_.r
                                               for _ in f.duplicates}
                                              for f in dups])

            for other in common_dpath:
                sim_measures = dpath_similarity(index, dpath, other)
                similarities[(dpath, other)] = sim_measures

        print(ut.repr4(similarities, si=True, nl=2))
Пример #4
0
    def find_duplicates(index):
        # fpaths = list(index.files.keys())
        files = list(index.files.values())
        print('Grouping {} files'.format(len(files)))
        grouped = ut.group_items(files, [f.nbytes for f in files])
        print('Found {} groups'.format(len(grouped)))
        potential_dups = {k: v for k, v in grouped.items() if len(v) > 1}
        print('Found {} potential dups by nbytes'.format(len(potential_dups)))

        GB = 2 ** 30  # NOQA
        MB = 2 ** 20  # NOQA
        max_bytes = 10 * MB
        min_bytes = 64 * MB

        duplicates = []
        for k, fs in ut.ProgIter(potential_dups.items(), freq=1):
            names = [f.n for f in fs]
            if ut.allsame(names):
                # Don't do big files yet
                if k < max_bytes and k > min_bytes:
                    if ut.allsame([f.hashid for f in fs]):
                        duplicates.extend(fs)
                        for f1, f2 in ut.combinations(fs, 2):
                            f1.duplicates.add(f2)
                            f2.duplicates.add(f1)

        def dpath_similarity(index, dpath1, dpath2):
            d1 = index[dpath1]
            d2 = index[dpath2]
            set1 = {f.hashid for f in ut.ProgIter(d1.files)}
            set2 = {f.hashid for f in ut.ProgIter(d2.files)}
            # n_isect = len(set1.intersection(set2))
            size1, size2 = map(len, (set1, set2))
            # minsize = min(size1, size2)
            # sim_measures = (n_isect, n_isect / minsize)
            return ut.set_overlaps(set1, set2)
            # return sim_measures

        similarities = {}
        r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates])
        for dpath, dups in r_to_dup.items():
            # Check to see if the duplicates all point to the same dir
            f = dups[0]  # NOQA
            common_dpath = set.intersection(*[
                {_.r for _ in f.duplicates} for f in dups])

            for other in common_dpath:
                sim_measures = dpath_similarity(index, dpath, other)
                similarities[(dpath, other)] = sim_measures

        print(ut.repr4(similarities, si=True, nl=2))
Пример #5
0
def testdb2_stuff():
    """
    tar -zcvf testdb2.tar.gz testdb2/
    """
    import ibeis
    ibs = ibeis.opendb('testdb2')

    #ibs.ensure_contributor_rowids()

    gid_list = ibs.get_valid_gids()

    # Group gids by species
    image_species_list = ut.get_list_column(
        ibs.unflat_map(ibs.get_annot_species_rowids, ibs.get_image_aids(gid_list)), 0)

    new_contrib_rowid1 = ibs.add_new_temp_contributor(offset=len(ibs.get_valid_contrib_rowids()))
    new_contrib_rowid2 = ibs.add_new_temp_contributor(offset=len(ibs.get_valid_contrib_rowids()))

    gids1, gids2 = list(ut.group_items(gid_list, image_species_list).values())

    party_rowids = ibs.add_party(['TestCar1', 'TestCar2'])
    partyid1, partyid2 = party_rowids
    ibs.set_image_contributor_rowid(gids1, [new_contrib_rowid1] * len(gids1))
    ibs.set_image_contributor_rowid(gids2, [new_contrib_rowid2] * len(gids2))
    ibs.set_image_party_rowids(gids1, [partyid1] * len(gids1))
    ibs.set_image_party_rowids(gids2, [partyid2] * len(gids2))
Пример #6
0
def group_review():
    prefill = request.args.get('prefill', '')
    if len(prefill) > 0:
        ibs = current_app.ibs
        aid_list = ibs.get_valid_aids()
        bad_species_list, bad_viewpoint_list = ibs.validate_annot_species_viewpoint_cnn(aid_list)

        GROUP_BY_PREDICTION = True
        if GROUP_BY_PREDICTION:
            grouped_dict = ut.group_items(bad_viewpoint_list, ut.get_list_column(bad_viewpoint_list, 3))
            grouped_list = grouped_dict.values()
            regrouped_items = ut.flatten(ut.sortedby(grouped_list, map(len, grouped_list)))
            candidate_aid_list = ut.get_list_column(regrouped_items, 0)
        else:
            candidate_aid_list = [ bad_viewpoint[0] for bad_viewpoint in bad_viewpoint_list]
    elif request.args.get('aid_list', None) is not None:
        aid_list = request.args.get('aid_list', '')
        if len(aid_list) > 0:
            aid_list = aid_list.replace('[', '')
            aid_list = aid_list.replace(']', '')
            aid_list = aid_list.strip().split(',')
            candidate_aid_list = [ int(aid_.strip()) for aid_ in aid_list ]
        else:
            candidate_aid_list = ''
    else:
        candidate_aid_list = ''

    return appf.template(None, 'group_review', candidate_aid_list=candidate_aid_list, mode_list=appf.VALID_TURK_MODES)
Пример #7
0
def get_encounter_num_names_with_exemplar(ibs, eid_list):
    r"""
    RESTful:
        Method: GET
        URL:    /api/encounter/num_names_with_exemplar/

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_encounter_funcs import *  # NOQA
        >>> import ibeis  # NOQA
        >>> ibs = ibeis.opendb('testdb1')
        >>> eid_list = ibs._get_all_encounter_rowids()
        >>> num_annots_reviewed_list = ibs.get_encounter_num_annotmatch_reviewed(eid_list)
    """
    aids_list = ibs.get_encounter_custom_filtered_aids(eid_list)
    exflags_list = ibs.unflat_map(ibs.get_annot_exemplar_flags, aids_list)
    nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list)
    groups_list = [ut.group_items(exflags, nids)
                   for exflags, nids in zip(exflags_list, nids_list)]
    #num_names_list = [len(groups) for groups in groups_list]
    num_exemplared_names_list = [
        sum([any(exflags) for exflags in six.itervalues(groups)])
        for groups in groups_list
    ]
    return num_exemplared_names_list
Пример #8
0
def get_imageset_num_names_with_exemplar(ibs, imgsetid_list):
    r"""
    RESTful:
        Method: GET
        URL:    /api/imageset/num_names_with_exemplar/

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_imageset_funcs import *  # NOQA
        >>> import ibeis  # NOQA
        >>> ibs = ibeis.opendb('testdb1')
        >>> imgsetid_list = ibs._get_all_imageset_rowids()
        >>> num_annots_reviewed_list = ibs.get_imageset_num_annotmatch_reviewed(imgsetid_list)
    """
    aids_list = ibs.get_imageset_custom_filtered_aids(imgsetid_list)
    exflags_list = ibs.unflat_map(ibs.get_annot_exemplar_flags, aids_list)
    nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list)
    groups_list = [
        ut.group_items(exflags, nids)
        for exflags, nids in zip(exflags_list, nids_list)
    ]
    #num_names_list = [len(groups) for groups in groups_list]
    num_exemplared_names_list = [
        sum([any(exflags) for exflags in six.itervalues(groups)])
        for groups in groups_list
    ]
    return num_exemplared_names_list
Пример #9
0
    def find_needsmove_to_other(self, other):
        hash1 = self.get_prop('md5_stride')
        hash2 = other.get_prop('md5_stride')
        idxs1 = list(range(len(hash1)))

        hash_to_idxs = ut.group_items(idxs1, hash1)
        # Find what we have that other doesnt have and move it there
        other_missing = set(hash1).difference(hash2)
        missing_idxs1 = ut.flatten(ut.take(hash_to_idxs, other_missing))

        data = ut.ColumnLists({
            'idx':
            missing_idxs1,
            'fname':
            self.get_prop('fname', missing_idxs1),
            'dname':
            self.get_prop('dname', missing_idxs1),
            'full_path':
            self.get_prop('full_path', missing_idxs1),
            'nbytes':
            self.get_prop('nbytes', missing_idxs1),
        })
        data = data.compress([f != 'Thumbs.db' for f in data['fname']])
        data['ext'] = self.get_prop('ext', data['idx'])
        ut.dict_hist(data['ext'])
        data.print(ignore=['full_path', 'dname'])
Пример #10
0
def report_partitioning_statistics(new_reduced_joint):
    # compute partitioning statistics
    import vtool as vt
    vals, idxs = vt.group_indices(new_reduced_joint.values.ravel())
    #groupsize = list(map(len, idxs))
    #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs)
    all_states = new_reduced_joint._row_labels(asindex=True)
    clusterstats = [tuple(sorted(list(ut.dict_hist(a).values())))
                    for a in all_states]
    grouped_vals = ut.group_items(new_reduced_joint.values.ravel(),
                                  clusterstats)

    #probs_assigned_to_clustertype = [(
    #    sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a)
    #    for a, b in grouped_vals.items()]
    probs_assigned_to_clustertype = [(
        ut.dict_hist(np.array(b).round(decimals=5)), a)
        for a, b in grouped_vals.items()]
    sortx = ut.argsort([max(c[0].keys())
                        for c in probs_assigned_to_clustertype])
    probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx)

    # This list of 2-tuples with the first item being the unique
    # probabilies that are assigned to a cluster type along with the number
    # of times they were assigned. A cluster type is the second item. Every
    # number represents how many annotations were assigned to a specific
    # label. The length of that list is the number of total labels.  For
    # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]]
    # indicating that that the assignment of everyone to a different label happend once
    # where the probability was somenum and a 800 times where the probability was 0.

    #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items())
    #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()])
    print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
Пример #11
0
 def _print_previous_loop_statistics(infr, count):
     # Print stats about what happend in the this loop
     history = infr.metrics_list[-count:]
     recover_blocks = ut.group_items([
         (k, sum(1 for i in g))
         for k, g in it.groupby(ut.take_column(history, 'recovering'))
     ]).get(True, [])
     infr.print((
         'Recovery mode entered {} times, '
         'made {} recovery decisions.').format(
             len(recover_blocks), sum(recover_blocks)), color='green')
     testaction_hist = ut.dict_hist(ut.take_column(history, 'test_action'))
     infr.print(
         'Test Action Histogram: {}'.format(
             ut.repr4(testaction_hist, si=True)), color='yellow')
     if infr.params['inference.enabled']:
         action_hist = ut.dict_hist(
             ut.emap(frozenset, ut.take_column(history, 'action')))
         infr.print(
             'Inference Action Histogram: {}'.format(
                 ub.repr2(action_hist, si=True)), color='yellow')
     infr.print(
         'Decision Histogram: {}'.format(ut.repr2(ut.dict_hist(
             ut.take_column(history, 'pred_decision')
         ), si=True)), color='yellow')
     infr.print(
         'User Histogram: {}'.format(ut.repr2(ut.dict_hist(
             ut.take_column(history, 'user_id')
         ), si=True)), color='yellow')
Пример #12
0
def report_partitioning_statistics(new_reduced_joint):
    # compute partitioning statistics
    import vtool as vt
    vals, idxs = vt.group_indices(new_reduced_joint.values.ravel())
    #groupsize = list(map(len, idxs))
    #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs)
    all_states = new_reduced_joint._row_labels(asindex=True)
    clusterstats = [tuple(sorted(list(ut.dict_hist(a).values())))
                    for a in all_states]
    grouped_vals = ut.group_items(new_reduced_joint.values.ravel(),
                                  clusterstats)

    #probs_assigned_to_clustertype = [(
    #    sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a)
    #    for a, b in grouped_vals.items()]
    probs_assigned_to_clustertype = [(
        ut.dict_hist(np.array(b).round(decimals=5)), a)
        for a, b in grouped_vals.items()]
    sortx = ut.argsort([max(c[0].keys())
                        for c in probs_assigned_to_clustertype])
    probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx)

    # This list of 2-tuples with the first item being the unique
    # probabilies that are assigned to a cluster type along with the number
    # of times they were assigned. A cluster type is the second item. Every
    # number represents how many annotations were assigned to a specific
    # label. The length of that list is the number of total labels.  For
    # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]]
    # indicating that that the assignment of everyone to a different label happend once
    # where the probability was somenum and a 800 times where the probability was 0.

    #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items())
    #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()])
    print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
Пример #13
0
    def find_connecting_edges(infr):
        """
        Searches for a small set of edges, which if reviewed as positive would
        ensure that each PCC is k-connected.  Note that in somes cases this is
        not possible
        """
        label = 'name_label'
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values())

        # k = infr.params['redun.pos']
        k = 1
        new_edges = []
        prog = ut.ProgIter(
            list(label_to_nodes.keys()),
            label='finding connecting edges',
            enabled=infr.verbose > 0,
        )
        for nid in prog:
            nodes = set(label_to_nodes[nid])
            G = infr.pos_graph.subgraph(nodes, dynamic=False)
            impossible = nxu.edges_inside(infr.neg_graph, nodes)
            impossible |= nxu.edges_inside(infr.incomp_graph, nodes)

            candidates = set(nx.complement(G).edges())
            candidates.difference_update(impossible)

            aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates)
            new_edges += aug_edges
        prog.ensure_newline()
        return new_edges
Пример #14
0
def groundtruth_merge_loop(infr):
    """
    Finds edges to make sure the ground truth is merged
    """
    from wbia.algo.graph import nx_utils as nxu

    infr.print('==============================', color='white')
    infr.print('--- GROUNDTRUTH MERGE LOOP ---', color='white')
    assert infr.test_mode, 'only run this in test mode'

    group = ut.group_items(infr.aids, infr.orig_name_labels)
    fix_edges = []

    # Tell the oracle its time to get serious
    # infr.oracle.normal_accuracy = 1.0
    # infr.oracle.recover_accuracy = 1.0

    for gt_nid, aids in group.items():
        pos_sub = infr.pos_graph.subgraph(aids)
        aug_edges = nxu.edge_augmentation(pos_sub, k=1, partial=True)
        fix_edges.extend(aug_edges)

    if infr.test_mode:
        infr.ensure_edges_from(fix_edges)
        infr.apply_edge_truth(fix_edges)

    for edge in fix_edges:
        try:
            feedback = infr.request_user_review(edge)
        except ReviewCanceled:
            raise
        infr.add_feedback(edge=edge, **feedback)
        infr.recovery_review_loop(verbose=0)
Пример #15
0
def testdb2_stuff():
    """
    tar -zcvf testdb2.tar.gz testdb2/
    """
    import ibeis
    ibs = ibeis.opendb('testdb2')

    #ibs.ensure_contributor_rowids()

    gid_list = ibs.get_valid_gids()

    # Group gids by species
    image_species_list = ut.get_list_column(
        ibs.unflat_map(ibs.get_annot_species_rowids,
                       ibs.get_image_aids(gid_list)), 0)

    new_contributor_rowid1 = ibs.add_new_temp_contributor(
        offset=len(ibs.get_valid_contributor_rowids()))
    new_contributor_rowid2 = ibs.add_new_temp_contributor(
        offset=len(ibs.get_valid_contributor_rowids()))

    gids1, gids2 = list(ut.group_items(gid_list, image_species_list).values())

    party_rowids = ibs.add_party(['TestCar1', 'TestCar2'])
    partyid1, partyid2 = party_rowids
    ibs.set_image_contributor_rowid(gids1,
                                    [new_contributor_rowid1] * len(gids1))
    ibs.set_image_contributor_rowid(gids2,
                                    [new_contributor_rowid2] * len(gids2))
    ibs.set_image_party_rowids(gids1, [partyid1] * len(gids1))
    ibs.set_image_party_rowids(gids2, [partyid2] * len(gids2))
Пример #16
0
    def oracle_review(sim):
        queue_params = {
            'pos_diameter': None,
            'neg_diameter': None,
        }
        infr = sim.infr
        prev = infr.verbose
        infr.verbose = 0
        # rng = np.random.RandomState(0)
        infr = sim.infr
        primary_truth = sim.primary_truth
        review_edges = infr.generate_reviews(**queue_params)
        max_reviews = 1000
        for count, (aid1, aid2) in enumerate(ut.ProgIter(review_edges)):
            state = primary_truth.loc[(aid1, aid2)].idxmax()
            tags = []
            infr.add_feedback(aid1,
                              aid2,
                              state,
                              tags,
                              apply=True,
                              rectify=False,
                              user_id='oracle',
                              confidence='absolutely_sure')
            if count > max_reviews:
                break
        infr.verbose = prev

        sim.results['max_reviews'] = max_reviews

        n_clusters, n_inconsistent = infr.relabel_using_reviews(rectify=False)
        assert n_inconsistent == 0, 'should not create any inconsistencies'

        sim.results['n_user_clusters'] = n_clusters
        # infr.apply_review_inference()

        curr_decisions = infr.edge_attr_df('decision')
        curr_truth = primary_truth.loc[curr_decisions.index].idxmax(axis=1)
        n_user_mistakes = curr_decisions != curr_truth
        sim.results['n_user_mistakes'] = sum(n_user_mistakes)

        gt_clusters = ut.group_pairs(infr.gen_node_attrs('orig_name_label'))
        curr_clusters = ut.group_pairs(infr.gen_node_attrs('name_label'))

        compare_results = compare_groups(list(gt_clusters.values()),
                                         list(curr_clusters.values()))
        sim.results.update(ut.map_vals(len, compare_results))

        common_per_num = ut.group_items(compare_results['common'],
                                        map(len, compare_results['common']))
        sumafter = 3
        greater = [i for i in common_per_num.keys() if i > sumafter]
        common_per_num['>%s' % sumafter] = ut.flatten(
            ut.take(common_per_num, greater))
        ut.delete_keys(common_per_num, greater)
        for k, v in common_per_num.items():
            sim.results['common@' + str(k)] = len(v)

        sim.results['n_names_common'] = len(compare_results['common'])
Пример #17
0
def view_file_in_directory(fpaths):
    import utool as ut
    fpaths = ut.ensure_iterable(fpaths)
    fnames = [basename(f) for f in fpaths]
    dpaths = [dirname(f) for f in fpaths]
    dpath_to_fnames = ut.group_items(fnames, dpaths)
    for dpath, fnames in dpath_to_fnames.items():
        ut.view_directory(dpath, fnames[0], verbose=False)
Пример #18
0
 def get_annot_sex_stats(aid_list):
     annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
     sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
     sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
     assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
     sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
     # Filter 0's
     sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
     return sextext2_nAnnots
Пример #19
0
    def predict_proba_df(verif, edges):
        """
        CommandLine:
            python -m wbia.algo.graph.demo DummyVerif.predict_edges

        Example:
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.graph.demo import *  # NOQA
            >>> from wbia.algo.graph import demo
            >>> import networkx as nx
            >>> kwargs = dict(num_pccs=40, size=2)
            >>> infr = demo.demodata_infr(**kwargs)
            >>> verif = infr.dummy_verif
            >>> edges = list(infr.graph.edges())
            >>> probs = verif.predict_proba_df(edges)
            >>> #print('scores = %r' % (scores,))
            >>> #hashid = ut.hash_data(scores)
            >>> #print('hashid = %r' % (hashid,))
            >>> #assert hashid == 'cdlkytilfeqgmtsihvhqwffmhczqmpil'
        """
        infr = verif.infr
        edges = list(it.starmap(verif.infr.e_, edges))
        prob_cache = infr.task_probs['match_state']
        is_miss = np.array([e not in prob_cache for e in edges])
        # is_hit = ~is_miss
        if np.any(is_miss):
            miss_edges = ut.compress(edges, is_miss)
            miss_truths = [verif._get_truth(edge) for edge in miss_edges]
            grouped_edges = ut.group_items(miss_edges,
                                           miss_truths,
                                           sorted_=False)
            # Need to make this determenistic too
            states = [POSTV, NEGTV, INCMP]
            for key in sorted(grouped_edges.keys()):
                group = grouped_edges[key]
                probs0 = randn(
                    shape=[len(group)],
                    rng=verif.rng,
                    a_max=1,
                    a_min=0,
                    **verif.dummy_params[key],
                )
                # Just randomly assign other probs
                probs1 = verif.rng.rand(len(group)) * (1 - probs0)
                probs2 = 1 - (probs0 + probs1)
                for edge, probs in zip(group, zip(probs0, probs1, probs2)):
                    prob_cache[edge] = ut.dzip(states, probs)

        from wbia.algo.graph import nx_utils as nxu
        import pandas as pd

        probs = pd.DataFrame(
            ut.take(prob_cache, edges),
            index=nxu.ensure_multi_index(edges, ('aid1', 'aid2')),
        )
        return probs
Пример #20
0
def level_order(graph):
    import utool as ut
    node_to_level = ut.nx_dag_node_rank(graph)
    #source = ut.nx_source_nodes(graph)[0]
    #longest_paths = dict([(target, dag_longest_path(graph, source, target))
    #                      for target in graph.nodes()])
    #node_to_level = ut.map_dict_vals(len, longest_paths)
    grouped = ut.group_items(node_to_level.keys(), node_to_level.values())
    levels = ut.take(grouped, range(1, len(grouped) + 1))
    return levels
Пример #21
0
 def assign_to_words(invindex, idx2_vec):
     idx2_wx, _idx2_wdist = invindex.wordflann.nn_index(idx2_vec, 1)
     if True:
         assign_df = pd.DataFrame(idx2_wx, columns=['wordindex'])
         grouping = assign_df.groupby('wordindex')
         wx2_idxs = grouping.wordindex.indices
     else:
         # TODO: replace with pandas groupby
         idx_list = list(range(len(idx2_wx)))
         wx2_idxs = utool.group_items(idx_list, idx2_wx.tolist())
     return wx2_idxs, idx2_wx
Пример #22
0
def print_factors(model, factor_list):
    if hasattr(model, 'var2_cpd'):
        semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list]
    else:
        semtypes = [0] * len(factor_list)
    for type_, factors in ut.group_items(factor_list, semtypes).items():
        logger.info('Result Factors (%r)' % (type_, ))
        factors = ut.sortedby(factors, [f.variables[0] for f in factors])
        for fs_ in ut.ichunks(factors, 4):
            ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                          'yellow')
Пример #23
0
 def print_dataset_info(data, labels, key):
     labelhist = {key: len(val) for key, val in ut.group_items(labels, labels).items()}
     stats_dict = ut.get_stats(data.ravel())
     ut.delete_keys(stats_dict, ['shape', 'nMax', 'nMin'])
     print('[dataset] Dataset Info: ')
     print('[dataset] * Data:')
     print('[dataset]     %s_data(shape=%r, dtype=%r)' % (key, data.shape, data.dtype))
     print('[dataset]     %s_memory(data) = %r' % (key, ut.get_object_size_str(data),))
     print('[dataset]     %s_stats(data) = %s' % (key, ut.repr2(stats_dict, precision=2),))
     print('[dataset] * Labels:')
     print('[dataset]     %s_labels(shape=%r, dtype=%r)' % (key, labels.shape, labels.dtype))
     print('[dataset]     %s_label histogram = %s' % (key, ut.repr2(labelhist)))
Пример #24
0
def print_factors(model, factor_list):
    if hasattr(model, 'var2_cpd'):
        semtypes = [model.var2_cpd[f.variables[0]].ttype
                    for f in factor_list]
    else:
        semtypes = [0] * len(factor_list)
    for type_, factors in ut.group_items(factor_list, semtypes).items():
        print('Result Factors (%r)' % (type_,))
        factors = ut.sortedby(factors, [f.variables[0] for f in factors])
        for fs_ in ut.ichunks(factors, 4):
            ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                          'yellow')
Пример #25
0
 def find_nonunique_names(self):
     fnames = map(basename, self.rel_fpath_list)
     duplicate_map = ut.find_duplicate_items(fnames)
     groups = []
     for dupname, idxs in duplicate_map.items():
         uuids = self.get_prop('uuids', idxs)
         fpaths = self.get_prop('abs', idxs)
         groups = ut.group_items(fpaths, uuids)
         if len(groups) > 1:
             if all(x == 1 for x in map(len, groups.values())):
                 # All groups are different, this is an simpler case
                 print(ut.repr2(groups, nl=3))
             else:
                 # Need to handle the multi-item groups first
                 pass
Пример #26
0
 def find_nonunique_names(self):
     fnames = map(basename, self.rel_fpath_list)
     duplicate_map = ut.find_duplicate_items(fnames)
     groups = []
     for dupname, idxs in duplicate_map.items():
         uuids = self.get_prop('uuids', idxs)
         fpaths = self.get_prop('abs', idxs)
         groups = ut.group_items(fpaths, uuids)
         if len(groups) > 1:
             if all(x == 1 for x in map(len, groups.values())):
                 # All groups are different, this is an simpler case
                 print(ut.repr2(groups, nl=3))
             else:
                 # Need to handle the multi-item groups first
                 pass
Пример #27
0
    def get_dependencies(depc, tablename):
        """
        gets level dependences from root to tablename

        CommandLine:
            python -m ibeis.depends_cache --exec-get_dependencies --show

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.depends_cache import *  # NOQA
            >>> depc = testdata_depc()
            >>> tablename = 'fgweight'
            >>> result = ut.repr3(depc.get_dependencies(tablename), nl=1)
            >>> print(result)
            [
                ['dummy_annot'],
                ['chip', 'probchip'],
                ['keypoint'],
                ['fgweight'],
            ]

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.depends_cache import *  # NOQA
            >>> depc = testdata_depc()
            >>> tablename = 'spam'
            >>> result = ut.repr3(depc.get_dependencies(tablename), nl=1)
            >>> print(result)
            [
                ['dummy_annot'],
                ['chip', 'probchip'],
                ['keypoint'],
                ['fgweight'],
                ['spam'],
            ]
        """
        root = depc.root_tablename
        children_, parents_ = list(zip(*depc.get_edges()))
        child_to_parents = ut.group_items(children_, parents_)
        to_root = {tablename: ut.paths_to_root(tablename, root, child_to_parents)}
        from_root = ut.reverse_path(to_root, root, child_to_parents)
        dependency_levels_ = ut.get_levels(from_root)
        dependency_levels = ut.longest_levels(dependency_levels_)
        #print('child_to_parents = %s' % (ut.repr3(child_to_parents),))
        #print('to_root = %r' % (to_root,))
        #print('from_root = %r' % (from_root,))
        return dependency_levels
Пример #28
0
    def check_baseline_results(sim):
        import networkx as nx
        infr = sim.infr
        n_names_possible = 0
        real_groups = ut.group_pairs(infr.gen_node_attrs('orig_name_label'))
        possible_clusters = []
        for nid, nodes in real_groups.items():
            if len(nodes) == 1:
                possible_clusters.append(nodes)
                n_names_possible += 1
                continue
            cc_cand_edges = list(ut.nx_edges_between(infr.graph, nodes))
            cc = ut.nx_from_node_edge(nodes, cc_cand_edges)
            mst = nx.minimum_spanning_tree(cc)
            ccs = list(nx.connected_components(mst))
            possible_clusters.extend(ccs)
            n_names_possible += (len(ccs))

        sumafter = 3

        best_possible_compare_results = compare_groups(
            list(real_groups.values()), list(possible_clusters))
        possible_per_num = ut.map_vals(
            len,
            ut.group_items(best_possible_compare_results['common'],
                           map(len, best_possible_compare_results['common'])))
        greater = [i for i in possible_per_num.keys() if i > sumafter]
        possible_per_num['>%s' % sumafter] = sum(
            ut.take(possible_per_num, greater))
        ut.delete_keys(possible_per_num, greater)
        for k, v in possible_per_num.items():
            sim.results['possible@' + str(k)] = v
        sim.results['possible'] = len(best_possible_compare_results['common'])

        # Measure the number of real names in the test (per number of annots)
        real_per_num = ut.dict_hist(map(len, real_groups.values()))
        greater = [i for i in real_per_num.keys() if i > sumafter]
        real_per_num['>%s' % sumafter] = sum(ut.take(real_per_num, greater))
        ut.delete_keys(real_per_num, greater)
        for k, v in real_per_num.items():
            sim.results['real@' + str(k)] = v

        sim.results['n_names_possible'] = n_names_possible
        sim.results['n_names_real'] = len(real_groups)
        sim.results['real'] = len(real_groups)
Пример #29
0
    def print_graph_connections(infr, label='orig_name_label'):
        """
        label = 'orig_name_label'
        """
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values())
        logger.info('CC info')
        for name, cc in label_to_nodes.items():
            logger.info('\nname = %r' % (name,))
            edges = list(nxu.edges_between(infr.graph, cc))
            logger.info(infr.get_edge_df_text(edges))

        logger.info('CC pair info')
        for (n1, cc1), (n2, cc2) in it.combinations(label_to_nodes.items(), 2):
            if n1 == n2:
                continue
            logger.info('\nname_pair = {}-vs-{}'.format(n1, n2))
            edges = list(nxu.edges_between(infr.graph, cc1, cc2))
            logger.info(infr.get_edge_df_text(edges))
Пример #30
0
    def find_clique_edges(infr, label='name_label'):
        """
        Augmenting edges that would complete each the specified cliques.
        (based on the group inferred from `label`)

        Args:
            label (str): node attribute to use as the group id to form the
                cliques.
        """
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values())
        new_edges = []
        for label, nodes in label_to_nodes.items():
            for edge in it.combinations(nodes, 2):
                if infr.edge_decision(edge) == UNREV:
                    new_edges.append(edge)
                # if infr.has_edge(edge):
                # else:
                #     new_edges.append(edge)
        return new_edges
Пример #31
0
def __debug_win_msvcr():
    import utool as ut
    fname = 'msvcr*.dll'
    key_list = ['PATH']
    found = ut.search_env_paths(fname, key_list)
    fpaths = ut.unique(ut.flatten(found.values()))
    fpaths = ut.lmap(ut.ensure_unixslash, fpaths)
    from os.path import basename
    dllnames = [basename(x) for x in fpaths]
    grouped = dict(ut.group_items(fpaths, dllnames))
    print(ut.dict_str(grouped, nl=4))

    keytoid = {}

    for key, vals in grouped.items():
        infos = ut.lmap(ut.get_file_nBytes, vals)
        #infos = ut.lmap(ut.get_file_uuid, vals)
        #uuids = [ut.get_file_uuid(val) for val in vals]
        keytoid[key] = list(zip(infos, vals))
    ut.print_dict(keytoid, nl=2)
Пример #32
0
def __debug_win_msvcr():
    import utool as ut
    fname = 'msvcr*.dll'
    key_list = ['PATH']
    found = ut.search_env_paths(fname, key_list)
    fpaths = ut.unique(ut.flatten(found.values()))
    fpaths = ut.lmap(ut.ensure_unixslash, fpaths)
    from os.path import basename
    dllnames = [basename(x) for x in fpaths]
    grouped = dict(ut.group_items(fpaths, dllnames))
    print(ut.repr4(grouped, nl=4))

    keytoid = {
    }

    for key, vals in grouped.items():
        infos = ut.lmap(ut.get_file_nBytes, vals)
        #infos = ut.lmap(ut.get_file_uuid, vals)
        #uuids = [ut.get_file_uuid(val) for val in vals]
        keytoid[key] = list(zip(infos, vals))
    ut.print_dict(keytoid, nl=2)
Пример #33
0
    def find_needsmove_to_other(self, other):
        hash1 = self.get_prop('md5_stride')
        hash2 = other.get_prop('md5_stride')
        idxs1 = list(range(len(hash1)))

        hash_to_idxs = ut.group_items(idxs1, hash1)
        # Find what we have that other doesnt have and move it there
        other_missing = set(hash1).difference(hash2)
        missing_idxs1 = ut.flatten(ut.take(hash_to_idxs, other_missing))

        data = ut.ColumnLists({
            'idx': missing_idxs1,
            'fname': self.get_prop('fname', missing_idxs1),
            'dname': self.get_prop('dname', missing_idxs1),
            'full_path': self.get_prop('full_path', missing_idxs1),
            'nbytes': self.get_prop('nbytes', missing_idxs1),
        })
        data = data.compress([f != 'Thumbs.db' for f in data['fname']])
        data['ext'] = self.get_prop('ext', data['idx'])
        ut.dict_hist(data['ext'])
        data.print(ignore=['full_path', 'dname'])
Пример #34
0
    def __init__(split_index, ibs, daid_list, num_forests=8):
        print('[nnsindex] make NNSplitIndex over %d annots' % (len(daid_list),))
        aid_list = daid_list
        nid_list = ibs.get_annot_nids(aid_list)
        #flag_list = ibs.get_annot_exemplar_flag(aid_list)
        nid2_aids = utool.group_items(aid_list, nid_list)
        key_list = nid2_aids.keys()
        aids_list = nid2_aids.values()
        isunknown_list = ibs.is_nid_unknown(key_list)

        known_aids  = utool.filterfalse_items(aids_list, isunknown_list)
        uknown_aids = utool.flatten(utool.filter_items(aids_list, isunknown_list))

        num_forests_ = min(max(map(len, aids_list)), num_forests)

        # Put one name per forest
        forest_aids, overflow_aids = utool.sample_zip(known_aids, num_forests_,
                                                      allow_overflow=True,
                                                      per_bin=1)

        forest_indexes = []
        extra_indexes = []
        for tx, aids in enumerate(forest_aids):
            print('[nnsindex] building forest %d/%d with %d aids' % (tx + 1, num_forests_, len(aids)))
            if len(aids) > 0:
                nn_index = NNIndex(ibs, aids)
                forest_indexes.append(nn_index)

        if len(overflow_aids) > 0:
            print('[nnsindex] building overflow forest')
            overflow_index = NNIndex(ibs, overflow_aids)
            extra_indexes.append(overflow_index)
        if len(uknown_aids) > 0:
            print('[nnsindex] building unknown forest')
            unknown_index = NNIndex(ibs, uknown_aids)
            extra_indexes.append(unknown_index)
        #print('[nnsindex] building normalizer forest')  # TODO

        split_index.forest_indexes = forest_indexes
        split_index.extra_indexes = extra_indexes
Пример #35
0
    def get_dependants(depc, tablename):
        """
        gets level dependences table to the leaves

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.depends_cache import *  # NOQA
            >>> depc = testdata_depc()
            >>> tablename = 'chip'
            >>> result = ut.repr3(depc.get_dependants(tablename), nl=1)
            >>> print(result)
            [
                ['chip'],
                ['keypoint'],
                ['fgweight', 'descriptor'],
                ['spam'],
            ]

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.depends_cache import *  # NOQA
            >>> depc = testdata_depc()
            >>> tablename = 'spam'
            >>> result = ut.repr3(depc.get_dependants(tablename), nl=1)
            >>> print(result)
            [
                ['spam'],
            ]

        """
        children_, parents_ = list(zip(*depc.get_edges()))
        parent_to_children = ut.group_items(parents_, children_)
        to_leafs = {tablename: ut.path_to_leafs(tablename, parent_to_children)}
        dependency_levels_ = ut.get_levels(to_leafs)
        dependency_levels = ut.longest_levels(dependency_levels_)
        return dependency_levels
Пример #36
0
    def find_mst_edges(infr, label='name_label'):
        """
        Returns edges to augment existing PCCs (by label) in order to ensure
        they are connected with positive edges.

        CommandLine:
            python -m wbia.algo.graph.mixin_helpers find_mst_edges --profile

        Example:
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.graph.mixin_helpers import *  # NOQA
            >>> import wbia
            >>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
            >>> infr = wbia.AnnotInference(ibs, 'all', autoinit=True)
            >>> label = 'orig_name_label'
            >>> label = 'name_label'
            >>> infr.find_mst_edges()
            >>> infr.ensure_mst()

        Ignore:
            old_mst_edges = [
                e for e, d in infr.edges(data=True)
                if d.get('user_id', None) == 'algo:mst'
            ]
            infr.graph.remove_edges_from(old_mst_edges)
            infr.pos_graph.remove_edges_from(old_mst_edges)
            infr.neg_graph.remove_edges_from(old_mst_edges)
            infr.incomp_graph.remove_edges_from(old_mst_edges)

        """
        # Find clusters by labels
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ut.group_items(node_to_label.keys(), node_to_label.values())

        weight_heuristic = infr.ibs is not None
        if weight_heuristic:
            annots = infr.ibs.annots(infr.aids)
            node_to_time = ut.dzip(annots, annots.time)
            node_to_view = ut.dzip(annots, annots.viewpoint_code)
            enabled_heuristics = {
                'view_weight',
                'time_weight',
            }

        def _heuristic_weighting(nodes, avail_uv):
            avail_uv = np.array(avail_uv)
            weights = np.ones(len(avail_uv))

            if 'view_weight' in enabled_heuristics:
                from vtool import _rhomb_dist

                view_edge = [(node_to_view[u], node_to_view[v]) for (u, v) in avail_uv]
                view_weight = np.array(
                    [_rhomb_dist.VIEW_CODE_DIST[(v1, v2)] for (v1, v2) in view_edge]
                )
                # Assume comparable by default and prefer undefined
                # more than probably not, but less than definately so.
                view_weight[np.isnan(view_weight)] = 1.5
                # Prefer viewpoint 10x more than time
                weights += 10 * view_weight

            if 'time_weight' in enabled_heuristics:
                # Prefer linking annotations closer in time
                times = ut.take(node_to_time, nodes)
                maxtime = vt.safe_max(times, fill=1, nans=False)
                mintime = vt.safe_min(times, fill=0, nans=False)
                time_denom = maxtime - mintime
                # Try linking by time for lynx data
                time_delta = np.array(
                    [abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv]
                )
                time_weight = time_delta / time_denom
                weights += time_weight

            weights = np.array(weights)
            weights[np.isnan(weights)] = 1.0

            avail = [(u, v, {'weight': w}) for (u, v), w in zip(avail_uv, weights)]
            return avail

        new_edges = []
        prog = ut.ProgIter(
            list(label_to_nodes.keys()),
            label='finding mst edges',
            enabled=infr.verbose > 0,
        )
        for nid in prog:
            nodes = set(label_to_nodes[nid])
            if len(nodes) == 1:
                continue
            # We want to make this CC connected
            pos_sub = infr.pos_graph.subgraph(nodes, dynamic=False)
            impossible = set(
                it.starmap(
                    e_,
                    it.chain(
                        nxu.edges_inside(infr.neg_graph, nodes),
                        nxu.edges_inside(infr.incomp_graph, nodes),
                        # nxu.edges_inside(infr.unknown_graph, nodes),
                    ),
                )
            )
            if len(impossible) == 0 and not weight_heuristic:
                # Simple mst augmentation
                aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1))
            else:
                complement = it.starmap(e_, nxu.complement_edges(pos_sub))
                avail_uv = [(u, v) for u, v in complement if (u, v) not in impossible]
                if weight_heuristic:
                    # Can do heuristic weighting to improve the MST
                    avail = _heuristic_weighting(nodes, avail_uv)
                else:
                    avail = avail_uv
                # logger.info(len(pos_sub))
                try:
                    aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1, avail=avail))
                except nx.NetworkXUnfeasible:
                    logger.info('Warning: MST augmentation is not feasible')
                    logger.info('explicit negative edges might disconnect a PCC')
                    aug_edges = list(
                        nxu.k_edge_augmentation(pos_sub, k=1, avail=avail, partial=True)
                    )
            new_edges.extend(aug_edges)
        prog.ensure_newline()

        for edge in new_edges:
            assert not infr.graph.has_edge(*edge), 'alrady have edge={}'.format(edge)
        return new_edges
Пример #37
0
 def __init__(self, tokens):
     if isinstance(tokens, six.string_types):
         tokens = tokenize_manacost(tokens)
     vals = ut.get_list_column(tokens, 0)
     types = ut.get_list_column(tokens, 1)
     self.type2_manas = dict(ut.group_items(vals, types))
Пример #38
0
def ingest_serengeti_mamal_cameratrap(species):
    """
    Downloads data from Serengeti dryad server

    References:
        http://datadryad.org/resource/doi:10.5061/dryad.5pt92
        Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015)
        Snapshot Serengeti, high-frequency annotated camera trap images of 40
        mammalian species in an African savanna. Scientific Data 2: 150026.
        http://dx.doi.org/10.1038/sdata.2015.26
        Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015)
        Data from: Snapshot Serengeti, high-frequency annotated camera trap
        images of 40 mammalian species in an African savanna. Dryad Digital
        Repository. http://dx.doi.org/10.5061/dryad.5pt92

    Args:
        species (?):

    CommandLine:
        python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species zebra_plains
        python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species cheetah

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> import ibeis
        >>> species = ut.get_argval('--species', type_=str, default=ibeis.const.TEST_SPECIES.ZEB_PLAIN)
        >>> # species = ut.get_argval('--species', type_=str, default='cheetah')
        >>> result = ingest_serengeti_mamal_cameratrap(species)
        >>> print(result)
    """
    'https://snapshotserengeti.s3.msi.umn.edu/'
    import ibeis

    if species is None:
        code = 'ALL'
    elif species == 'zebra_plains':
        code = 'PZ'
    elif species == 'cheetah':
        code = 'CHTH'
    else:
        raise NotImplementedError()

    if species == 'zebra_plains':
        serengeti_sepcies = 'zebra'
    else:
        serengeti_sepcies = species

    print('species = %r' % (species,))
    print('serengeti_sepcies = %r' % (serengeti_sepcies,))

    dbname = code + '_Serengeti'
    print('dbname = %r' % (dbname,))
    dbdir = ut.ensuredir(join(ibeis.sysres.get_workdir(), dbname))
    print('dbdir = %r' % (dbdir,))
    image_dir = ut.ensuredir(join(dbdir, 'images'))

    base_url = 'http://datadryad.org/bitstream/handle/10255'
    all_images_url         = base_url + '/dryad.86392/all_images.csv'
    consensus_metadata_url = base_url + '/dryad.86348/consensus_data.csv'
    search_effort_url      = base_url + '/dryad.86347/search_effort.csv'
    gold_standard_url      = base_url + '/dryad.76010/gold_standard_data.csv'

    all_images_fpath         = ut.grab_file_url(all_images_url, download_dir=dbdir)
    consensus_metadata_fpath = ut.grab_file_url(consensus_metadata_url, download_dir=dbdir)
    search_effort_fpath      = ut.grab_file_url(search_effort_url, download_dir=dbdir)
    gold_standard_fpath      = ut.grab_file_url(gold_standard_url, download_dir=dbdir)

    print('all_images_fpath         = %r' % (all_images_fpath,))
    print('consensus_metadata_fpath = %r' % (consensus_metadata_fpath,))
    print('search_effort_fpath      = %r' % (search_effort_fpath,))
    print('gold_standard_fpath      = %r' % (gold_standard_fpath,))

    def read_csv(csv_fpath):
        import utool as ut
        csv_text = ut.read_from(csv_fpath)
        csv_lines = csv_text.split('\n')
        print(ut.list_str(csv_lines[0:2]))
        csv_data = [[field.strip('"').strip('\r') for field in line.split(',')]
                    for line in csv_lines if len(line) > 0]
        csv_header = csv_data[0]
        csv_data = csv_data[1:]
        return csv_data, csv_header

    def download_image_urls(image_url_info_list):
        # Find ones that we already have
        print('Requested %d downloaded images' % (len(image_url_info_list)))
        full_gpath_list = [join(image_dir, basename(gpath)) for gpath in image_url_info_list]
        exists_list = [ut.checkpath(gpath) for gpath in full_gpath_list]
        image_url_info_list_ = ut.compress(image_url_info_list, ut.not_list(exists_list))
        print('Already have %d/%d downloaded images' % (
            len(image_url_info_list) - len(image_url_info_list_), len(image_url_info_list)))
        print('Need to download %d images' % (len(image_url_info_list_)))
        #import sys
        #sys.exit(0)
        # Download the rest
        imgurl_prefix = 'https://snapshotserengeti.s3.msi.umn.edu/'
        image_url_list = [imgurl_prefix + suffix for suffix in image_url_info_list_]
        for img_url in ut.ProgressIter(image_url_list, lbl='Downloading image'):
            ut.grab_file_url(img_url, download_dir=image_dir)
        return full_gpath_list

    # Data contains information about which events have which animals
    if False:
        species_class_csv_data, species_class_header = read_csv(gold_standard_fpath)
        species_class_eventid_list    = ut.get_list_column(species_class_csv_data, 0)
        #gold_num_species_annots_list = ut.get_list_column(gold_standard_csv_data, 2)
        species_class_species_list    = ut.get_list_column(species_class_csv_data, 2)
        #gold_count_list              = ut.get_list_column(gold_standard_csv_data, 3)
    else:
        species_class_csv_data, species_class_header = read_csv(consensus_metadata_fpath)
        species_class_eventid_list    = ut.get_list_column(species_class_csv_data, 0)
        species_class_species_list    = ut.get_list_column(species_class_csv_data, 7)

    # Find the zebra events
    serengeti_sepcies_set = sorted(list(set(species_class_species_list)))
    print('serengeti_sepcies_hist = %s' %
          ut.dict_str(ut.dict_hist(species_class_species_list), key_order_metric='val'))
    #print('serengeti_sepcies_set = %s' % (ut.list_str(serengeti_sepcies_set),))

    assert serengeti_sepcies in serengeti_sepcies_set, 'not a known  seregeti species'
    species_class_chosen_idx_list = ut.list_where(
        [serengeti_sepcies == species_ for species_ in species_class_species_list])
    chosen_eventid_list = ut.take(species_class_eventid_list, species_class_chosen_idx_list)

    print('Number of chosen species:')
    print(' * len(species_class_chosen_idx_list) = %r' % (len(species_class_chosen_idx_list),))
    print(' * len(chosen_eventid_list) = %r' % (len(chosen_eventid_list),))

    # Read info about which events have which images
    images_csv_data, image_csv_header = read_csv(all_images_fpath)
    capture_event_id_list = ut.get_list_column(images_csv_data, 0)
    image_url_info_list = ut.get_list_column(images_csv_data, 1)
    # Group photos by eventid
    eventid_to_photos = ut.group_items(image_url_info_list, capture_event_id_list)

    # Filter to only chosens
    unflat_chosen_url_infos = ut.dict_take(eventid_to_photos, chosen_eventid_list)
    chosen_url_infos = ut.flatten(unflat_chosen_url_infos)
    image_url_info_list = chosen_url_infos
    chosen_path_list = download_image_urls(chosen_url_infos)

    ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=True)
    gid_list_ = ibs.add_images(chosen_path_list, auto_localize=False)  # NOQA

    # Attempt to automatically detect the annotations
    #aids_list = ibs.detect_random_forest(gid_list_, species)
    #aids_list

    #if False:
    #    # remove non-zebra photos
    #    from os.path import basename
    #    base_gname_list = list(map(basename, zebra_url_infos))
    #    all_gname_list = ut.list_images(image_dir)
    #    nonzebra_gname_list = ut.setdiff_ordered(all_gname_list, base_gname_list)
    #    nonzebra_gpath_list = ut.fnames_to_fpaths(nonzebra_gname_list, image_dir)
    #    ut.remove_fpaths(nonzebra_gpath_list)
    return ibs
Пример #39
0
def reasign_names1(ibs, aid_list=None, old_img2_names=None, common_prefix=''):
    r"""
    Changes the names in the IA-database to correspond to an older naming
    convention.  If splits and merges were preformed tries to find the
    maximally consistent renaming scheme.

    Notes:
        For each annotation:
        * get the image
        * get the image full path
        * strip the full path down to the file name prefix:
             [ example /foo/bar/pic.jpg -> pic ]
        * make the name of the individual associated with that annotation be the
          file name prefix
        * save the new names to the image analysis database
        * wildbook will make a request to get all of the annotations, image
          file names, image names and animal ids

    CommandLine:
        python -m ibeis.scripts.name_recitifer rectify_names --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aid_list = None
        >>> common_prefix = ''
        >>> old_img2_names = None #['img_fred.png', ']
        >>> result = reasign_names1(ibs, aid_list, img_list, name_list)
    """
    if aid_list is None:
        aid_list = ibs.get_valid_aids()
    # Group annotations by their current IA-name
    nid_list = ibs.get_annot_name_rowids(aid_list)
    nid2_aids = ut.group_items(aid_list, nid_list)
    unique_nids = list(nid2_aids.keys())
    grouped_aids = list(nid2_aids.values())

    # Get grouped images
    grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids)

    # Assume a mapping from old image names to old names is given.
    # Or just hack it in the Lewa case.
    if old_img2_names is None:
        def get_name_from_gname(gname):
            from os.path import splitext
            gname_, ext = splitext(gname)
            assert gname_.startswith(common_prefix), 'prefix assumption is invalidated'
            gname_ = gname_[len(common_prefix):]
            return gname_
        # Create mapping from image name to the desired "name" for the image.
        old_img2_names = {gname: get_name_from_gname(gname)
                          for gname in ut.flatten(grouped_imgnames)}

    # Make the name of the individual associated with that annotation be the file name prefix
    grouped_oldnames = [ut.take(old_img2_names, gnames) for gnames in grouped_imgnames]

    # The task is now to map each name in unique_nids to one of these names
    # subject to the contraint that each name can only be used once.  This is
    # solved using a maximum bipartite matching. The new names are the left
    # nodes, the old name are the right nodes, and grouped_oldnames definse the
    # adjacency matrix.
    # NOTE: In rare cases it may be impossible to find a correct labeling using
    # only old names.  In this case new names will be created.
    new_name_text = find_consistent_labeling(grouped_oldnames)

    dry = False
    if not dry:
        # Save the new names to the image analysis database
        ibs.set_name_texts(unique_nids, new_name_text)
Пример #40
0
def download_sharks(XMLdata, number):
    """
    cd ~/work/WS_ALL
    python -m ibeis.scripts.getshark

    >>> from ibeis.scripts.getshark import *  # NOQA
    >>> url = 'www.whaleshark.org/listImages.jsp'
    >>> XMLdata = ut.url_read(url)
    >>> number = None
    """
    # Prepare the output directory for writing, if it doesn't exist
    output_dir = 'sharkimages'
    ut.ensuredir(output_dir)

    dom = parseString(XMLdata)

    # Download files
    if number:
        maxCount = min(number, len(dom.getElementsByTagName('img')))
    else:
        maxCount = len(dom.getElementsByTagName('img'))

    parsed_info = dict(
        img_url_list=[],
        localid_list=[],
        nameid_list=[],
        orig_fname_list=[],
        new_fname_list=[],
    )

    print('Preparing to fetch %i files...' % maxCount)

    for shark in dom.getElementsByTagName('shark'):
        localCount = 0
        for imageset in shark.getElementsByTagName('imageset'):
            for img in imageset.getElementsByTagName('img'):
                localCount += 1

                img_url = img.getAttribute('href')
                orig_fname = split(img_url)[1]
                ext = splitext(orig_fname)[1].lower()
                nameid = shark.getAttribute('number')

                new_fname = '%s-%i%s' % (
                    nameid, localCount, ext)

                parsed_info['img_url_list'].append(img_url)
                parsed_info['nameid_list'].append(nameid)
                parsed_info['localid_list'].append(localCount)
                parsed_info['orig_fname_list'].append(orig_fname)
                parsed_info['new_fname_list'].append(new_fname)

                print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount))

                if number is not None and len(parsed_info['orig_fname_list']) == number:
                    break
    parsed_info['new_fpath_list'] = [join(output_dir, _fname)
                                     for _fname in parsed_info['new_fname_list']]

    print('Filtering parsed images')

    # Filter based on image type (keep only jpgs)
    ext_flags = [_fname.endswith('.jpg') or _fname.endswith('.jpg')
                  for _fname in parsed_info['new_fname_list']]
    parsed_info = {key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items()}

    # Filter to only images matching the appropriate tags
    from ibeis import tag_funcs
    parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list'])
    tag_flags = tag_funcs.filterflags_general_tags(
        parsed_info['tags_list'],
        has_any=['view-left'],
        none_match=['qual.*', 'view-top', 'part-.*', 'cropped'],
    )
    parsed_info = {key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items()}
    print('Tags in chosen images:')
    print(ut.dict_hist(ut.flatten(parsed_info['tags_list'] )))

    # Download selected subset
    print('Downloading selected subset')
    _iter = list(zip(parsed_info['img_url_list'],
                     parsed_info['new_fpath_list']))
    _iter = ut.ProgressIter(_iter, lbl='downloading sharks')
    for img_url, new_fpath in _iter:
        if not exists(new_fpath):
            ut.download_url(img_url, new_fpath)

    # Remove corrupted or ill-formatted images
    print('Checking for corrupted images')
    import vtool as vt
    noncorrupt_flags = vt.filterflags_valid_images(parsed_info['new_fpath_list'])
    parsed_info = {
        key: ut.compress(list_, noncorrupt_flags)
        for key, list_ in parsed_info.items()
    }

    print('Removing small images')
    import numpy as np
    imgsize_list = np.array([vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']])
    sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1))
    areq_flags_list = sqrt_area_list >= 750
    parsed_info = {key: ut.compress(list_, areq_flags_list)
                   for key, list_ in parsed_info.items()}

    grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))),
                                  parsed_info['nameid_list'])
    keep_idxs = sorted(ut.flatten([idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2]))
    parsed_info = {key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items()}

    print('Moving imagse to secondary directory')
    named_outputdir = 'named-left-sharkimages'
    # Build names
    parsed_info['namedir_fpath_list'] = [
        join(named_outputdir, _nameid, _fname)
        for _fname, _nameid in zip(parsed_info['new_fname_list'],
                                   parsed_info['nameid_list'])]
    # Create directories
    ut.ensuredir(named_outputdir)
    named_dirs = ut.unique_ordered(list(map(dirname, parsed_info['namedir_fpath_list'])))
    for dir_ in named_dirs:
        ut.ensuredir(dir_)
    # Copy
    ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'],
                     dst_fpath_list=parsed_info['namedir_fpath_list'])
Пример #41
0
def nx_agraph_layout(graph, orig_graph=None, inplace=False, verbose=None, **kwargs):
    r"""
    orig_graph = graph
    graph = layout_graph

    References:
        http://www.graphviz.org/content/attrs
        http://www.graphviz.org/doc/info/attrs.html
    """
    import networkx as nx
    import pygraphviz

    kwargs = kwargs.copy()
    prog = kwargs.pop('prog', 'dot')
    if prog != 'dot':
        kwargs['overlap'] = kwargs.get('overlap', 'false')
    kwargs['splines'] = kwargs.get('splines', 'spline')
    kwargs['notranslate'] = 'true'  # for neato postprocessing
    argparts = ['-G%s=%s' % (key, str(val))
                for key, val in kwargs.items()]
    args = ' '.join(argparts)
    splines = kwargs['splines']
    if verbose is None:
        verbose = ut.VERBOSE
    if verbose:
        print('args = %r' % (args,))
    # Convert to agraph format
    graph_ = graph.copy()

    ut.nx_ensure_agraph_color(graph_)

    # Reduce size to be in inches not pixels
    # FIXME: make robust to param settings
    # Hack to make the w/h of the node take thae max instead of
    # dot which takes the minimum
    shaped_nodes = [n for n, d in graph_.nodes(data=True) if 'width' in d]
    node_attrs = ut.dict_take(graph_.node, shaped_nodes)
    width_px = np.array(ut.take_column(node_attrs, 'width'))
    height_px = np.array(ut.take_column(node_attrs, 'height'))
    scale = np.array(ut.dict_take_column(node_attrs, 'scale', default=1.0))

    width_in = width_px / 72.0 * scale
    height_in = height_px / 72.0 * scale
    width_in_dict = dict(zip(shaped_nodes, width_in))
    height_in_dict = dict(zip(shaped_nodes, height_in))
    nx.set_node_attributes(graph_, 'width', width_in_dict)
    nx.set_node_attributes(graph_, 'height', height_in_dict)
    ut.nx_delete_node_attr(graph_, 'scale')

    # Check for any nodes with groupids
    node_to_groupid = nx.get_node_attributes(graph_, 'groupid')
    if node_to_groupid:
        groupid_to_nodes = ut.group_items(*zip(*node_to_groupid.items()))
    else:
        groupid_to_nodes = {}
    # Initialize agraph format
    #import utool
    #utool.embed()
    ut.nx_delete_None_edge_attr(graph_)
    agraph = nx.nx_agraph.to_agraph(graph_)
    # Add subgraphs labels
    # TODO: subgraph attrs
    group_attrs = graph.graph.get('groupattrs', {})
    for groupid, nodes in groupid_to_nodes.items():
        # subgraph_attrs = {}
        subgraph_attrs = group_attrs.get(groupid, {}).copy()
        cluster_flag = True
        # FIXME: make this more natural to specify
        if 'cluster' in subgraph_attrs:
            cluster_flag = subgraph_attrs['cluster']
            del subgraph_attrs['cluster']
        # subgraph_attrs = dict(rankdir='LR')
        # subgraph_attrs = dict(rankdir='LR')
        # subgraph_attrs['rank'] = 'min'
        # subgraph_attrs['rank'] = 'source'
        name = groupid
        if cluster_flag:
            # graphviz treast subgraphs labeld with cluster differently
            name = 'cluster_' + groupid
        else:
            name = groupid
        agraph.add_subgraph(nodes, name, **subgraph_attrs)
    for node in graph_.nodes():
        # force pinning of node points
        anode = pygraphviz.Node(agraph, node)
        if anode.attr['pin'] == 'true':
            if anode.attr['pos'] is not None and len(anode.attr['pos']) > 0 and not anode.attr['pos'].endswith('!'):
                import re
                #utool.embed()
                ptstr_ = anode.attr['pos']
                #print('ptstr_ = %r' % (ptstr_,))
                ptstr = ptstr_.strip('[]').strip(' ').strip('()')
                #print('ptstr = %r' % (ptstr,))
                ptstr_list = [x.rstrip(',') for x in re.split(r'\s+', ptstr)]
                #print('ptstr_list = %r' % (ptstr_list,))
                pt_list = list(map(float, ptstr_list))
                #print('pt_list = %r' % (pt_list,))
                pt_arr = np.array(pt_list) / 72.0
                #print('pt_arr = %r' % (pt_arr,))
                new_ptstr_list = list(map(str, pt_arr))
                new_ptstr = ','.join(new_ptstr_list) + '!'
                #print('new_ptstr = %r' % (new_ptstr,))
                anode.attr['pos'] = new_ptstr

    # Run layout
    #print('prog = %r' % (prog,))
    if ut.VERBOSE or verbose > 0:
        print('BEFORE LAYOUT\n' + str(agraph))
    agraph.layout(prog=prog, args=args)
    agraph.draw(ut.truepath('~/test_graphviz_draw.png'))
    if ut.VERBOSE or verbose > 1:
        print('AFTER LAYOUT\n' + str(agraph))

    # TODO: just replace with a single dict of attributes
    node_layout_attrs = ut.ddict(dict)
    edge_layout_attrs = ut.ddict(dict)

    #for node in agraph.nodes():
    for node in graph_.nodes():
        anode = pygraphviz.Node(agraph, node)
        node_attrs = parse_anode_layout_attrs(anode)
        for key, val in node_attrs.items():
            node_layout_attrs[key][node] = val

    edges = list(ut.nx_edges(graph_, keys=True))

    for edge in edges:
        aedge = pygraphviz.Edge(agraph, *edge)
        edge_attrs = parse_aedge_layout_attrs(aedge)
        for key, val in edge_attrs.items():
            edge_layout_attrs[key][edge] = val

    if orig_graph is not None and kwargs.get('draw_implicit', True):
        # ADD IN IMPLICIT EDGES
        layout_edges = set(ut.nx_edges(graph_, keys=True))
        orig_edges = set(ut.nx_edges(orig_graph, keys=True))
        implicit_edges = list(orig_edges - layout_edges)
        #all_edges = list(set.union(orig_edges, layout_edges))
        needs_implicit = len(implicit_edges) > 0
        if needs_implicit:
            # Pin down positions
            for node in agraph.nodes():
                anode = pygraphviz.Node(agraph, node)
                anode.attr['pin'] = 'true'
                anode.attr['pos'] += '!'

            # Add new edges to route
            for iedge in implicit_edges:
                data = orig_graph.get_edge_data(*iedge)
                agraph.add_edge(*iedge, **data)

            if ut.VERBOSE or verbose:
                print('BEFORE IMPLICIT LAYOUT\n' + str(agraph))
            # Route the implicit edges (must use neato)

            control_node = pygraphviz.Node(agraph, node)
            #print('control_node = %r' % (control_node,))
            node1_attr1 = parse_anode_layout_attrs(control_node)
            #print('node1_attr1 = %r' % (node1_attr1,))

            implicit_kw = kwargs.copy()
            implicit_kw['overlap'] = 'true'
            #del implicit_kw['overlap']  # can cause node positions to change
            argparts = ['-G%s=%s' % (key, str(val))
                        for key, val in implicit_kw.items()]
            args = ' '.join(argparts)
            #print('args = %r' % (args,))

            #import utool
            #utool.embed()

            agraph.layout(prog='neato', args='-n ' + args)
            agraph.draw(ut.truepath('~/implicit_test_graphviz_draw.png'))
            if ut.VERBOSE or verbose:
                print('AFTER IMPLICIT LAYOUT\n' + str(agraph))

            control_node = pygraphviz.Node(agraph, node)
            print('control_node = %r' % (control_node,))
            node1_attr2 = parse_anode_layout_attrs(control_node)
            print('node1_attr2 = %r' % (node1_attr2,))

            # graph positions shifted
            # This is not the right place to divide by 72
            translation = (node1_attr1['pos'] - node1_attr2['pos'] )
            #print('translation = %r' % (translation,))
            #translation = np.array([0, 0])
            print('translation = %r' % (translation,))

            #for iedge in all_edges:
            for iedge in implicit_edges:
                aedge = pygraphviz.Edge(agraph, *iedge)
                iedge_attrs = parse_aedge_layout_attrs(aedge, translation)
                for key, val in iedge_attrs.items():
                    edge_layout_attrs[key][iedge] = val

    graph_layout_attrs = dict(
        splines=splines
    )

    layout_info = {
        'graph': graph_layout_attrs,
        'edge': dict(edge_layout_attrs),
        'node': dict(node_layout_attrs),
    }

    if inplace:
        if orig_graph is not None:
            graph = orig_graph
        apply_graph_layout_attrs(graph, layout_info)

    return graph, layout_info
Пример #42
0
def reasign_names2(ibs, gname_name_pairs, aid_list=None):
    """

    Notes:
        * Given a list of pairs:  image file names (full path), animal name.
        * Go through all the images in the database and create a dictionary
        that associates the file name (full path) of the image in the database
        with the
          annotation or annotations associated with that image.
        * Go through the list of pairs:
          For each image file name, look up in the dictionary the image file
          name and assign the annotation associated with the image file name
          the animal name
        * Throughout this, keep a list of annotations that have been changed
        * Wildbook will issue a pull request to get these annotation.

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aid_list = None
        >>> common_prefix = ''
        >>> gname_name_pairs = [
        >>>     ('easy1.JPG', 'easy'),
        >>>     ('easy2.JPG', 'easy'),
        >>>     ('easy3.JPG', 'easy'),
        >>>     ('hard1.JPG', 'hard')
        >>> ]
        >>> changed_pairs = reasign_names2(gname_name_pairs)
    """
    from os.path import basename
    if aid_list is None:
        aid_list = ibs.get_valid_aids()
    annot_gnames = ibs.get_annot_image_names(aid_list)
    # Other image name getters that may be useful
    # ibs.get_annot_image_paths(aid_list)
    # ibs.get_image_uris_original(ibs.get_annot_gids(aid_list))
    gname2_aids = ut.group_items(aid_list, annot_gnames)

    changed_aids = []
    changed_names = []

    for gname, name in gname_name_pairs:
        # make sure its just the last part of the name.
        # Ignore preceding path
        gname = basename(gname)
        aids = gname2_aids[gname]
        texts = ibs.get_annot_name_texts(aids)
        flags = [text != name for text in texts]
        aids_ = ut.compress(aids, flags)
        if len(aids_):
            changed_aids.extend(aids_)
            changed_names.extend([name] * len(aids_))

    dry = False
    if not dry:
        # Save the new names to the image analysis database
        ibs.set_annot_name_texts(changed_aids, changed_names)

    # Returned list tells you who was changed.
    changed_pairs = list(zip(changed_names, changed_aids))
    return changed_pairs
Пример #43
0
def reasign_names1(ibs, aid_list=None, old_img2_names=None, common_prefix=''):
    r"""
    Changes the names in the IA-database to correspond to an older
    naming convention.  If splits and merges were preformed tries to
    find the maximally consistent renaming scheme.

    Notes:
        For each annotation:
        * get the image
        * get the image full path
        * strip the full path down to the file name prefix:
             [ example /foo/bar/pic.jpg -> pic ]
        * make the name of the individual associated with that annotation be the
          file name prefix
        * save the new names to the image analysis database
        * wildbook will make a request to get all of the annotations, image
          file names, image names and animal ids

    CommandLine:
        python -m ibeis.scripts.name_recitifer rectify_names --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aid_list = None
        >>> common_prefix = ''
        >>> old_img2_names = None #['img_fred.png', ']
        >>> result = reasign_names1(ibs, aid_list, img_list, name_list)
    """
    if aid_list is None:
        aid_list = ibs.get_valid_aids()
    # Group annotations by their current IA-name
    nid_list = ibs.get_annot_name_rowids(aid_list)
    nid2_aids = ut.group_items(aid_list, nid_list)
    unique_nids = list(nid2_aids.keys())
    grouped_aids = list(nid2_aids.values())

    # Get grouped images
    grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids)

    # Assume a mapping from old image names to old names is given.
    # Or just hack it in the Lewa case.
    if old_img2_names is None:

        def get_name_from_gname(gname):
            from os.path import splitext
            gname_, ext = splitext(gname)
            assert gname_.startswith(common_prefix), (
                'prefix assumption is invalidated')
            gname_ = gname_[len(common_prefix):]
            return gname_

        # Create mapping from image name to the desired "name" for the image.
        old_img2_names = {
            gname: get_name_from_gname(gname)
            for gname in ut.flatten(grouped_imgnames)
        }

    # Make the name of the individual associated with that annotation be the
    # file name prefix
    grouped_oldnames = [
        ut.take(old_img2_names, gnames) for gnames in grouped_imgnames
    ]

    # The task is now to map each name in unique_nids to one of these names
    # subject to the contraint that each name can only be used once.  This is
    # solved using a maximum bipartite matching. The new names are the left
    # nodes, the old name are the right nodes, and grouped_oldnames definse the
    # adjacency matrix.
    # NOTE: In rare cases it may be impossible to find a correct labeling using
    # only old names.  In this case new names will be created.
    new_name_text = find_consistent_labeling(grouped_oldnames)

    dry = False
    if not dry:
        # Save the new names to the image analysis database
        ibs.set_name_texts(unique_nids, new_name_text)
Пример #44
0
def reasign_names2(ibs, gname_name_pairs, aid_list=None):
    """

    Notes:
        * Given a list of pairs:  image file names (full path), animal name.
        * Go through all the images in the database and create a dictionary that
          associates the file name (full path) of the image in the database with the
          annotation or annotations associated with that image.
        * Go through the list of pairs:
          For each image file name, look up in the dictionary the image file
          name and assign the annotation associated with the image file name
          the animal name
        * Throughout this, keep a list of annotations that have been changed
        * Wildbook will issue a pull request to get these annotation.

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> aid_list = None
        >>> common_prefix = ''
        >>> gname_name_pairs = [
        >>>     ('easy1.JPG', 'easy'),
        >>>     ('easy2.JPG', 'easy'),
        >>>     ('easy3.JPG', 'easy'),
        >>>     ('hard1.JPG', 'hard')
        >>> ]
        >>> changed_pairs = reasign_names2(gname_name_pairs)
    """
    from os.path import basename
    if aid_list is None:
        aid_list = ibs.get_valid_aids()
    annot_gnames = ibs.get_annot_image_names(aid_list)
    # Other image name getters that may be useful
    # ibs.get_annot_image_paths(aid_list)
    # ibs.get_image_uris_original(ibs.get_annot_gids(aid_list))
    gname2_aids = ut.group_items(aid_list, annot_gnames)

    changed_aids = []
    changed_names = []

    for gname, name in gname_name_pairs:
        # make sure its just the last part of the name.
        # Ignore preceding path
        gname = basename(gname)
        aids = gname2_aids[gname]
        texts = ibs.get_annot_name_texts(aids)
        flags = [text != name for text in texts]
        aids_ = ut.compress(aids, flags)
        if len(aids_):
            changed_aids.extend(aids_)
            changed_names.extend([name] * len(aids_))

    dry = False
    if not dry:
        # Save the new names to the image analysis database
        ibs.set_annot_name_texts(changed_aids, changed_names)

    # Returned list tells you who was changed.
    changed_pairs = list(zip(changed_names, changed_aids))
    return changed_pairs
Пример #45
0
def estimate_twoday_count(ibs, day1, day2, filter_kw):
    #gid_list = ibs.get_valid_gids()
    all_images = ibs.images()
    dates = [dt.date() for dt in all_images.datetime]
    date_to_images = all_images.group_items(dates)
    date_to_images = ut.sort_dict(date_to_images)
    #date_hist = ut.map_dict_vals(len, date2_gids)
    #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),))
    verbose = 0

    visit_dates = [day1, day2]
    visit_info_list_ = []
    for day in visit_dates:
        images = date_to_images[day]
        aids = ut.flatten(images.aids)
        aids = ibs.filter_annots_general(aids, filter_kw=filter_kw,
                                         verbose=verbose)
        nids = ibs.get_annot_name_rowids(aids)
        grouped_aids = ut.group_items(aids, nids)
        unique_nids = ut.unique(list(grouped_aids.keys()))

        if False:
            aids_list = ut.take(grouped_aids, unique_nids)
            for aids in aids_list:
                if len(aids) > 30:
                    break
            timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list)
            # Do the five second rule
            marked_thresh = 5
            flags = []
            for nid, timedeltas in zip(unique_nids, timedeltas_list):
                flags.append(timedeltas.max() > marked_thresh)
            print('Unmarking %d names' % (len(flags) - sum(flags)))
            unique_nids = ut.compress(unique_nids, flags)
            grouped_aids = ut.dict_subset(grouped_aids, unique_nids)

        unique_aids = ut.flatten(list(grouped_aids.values()))
        info = {
            'unique_nids': unique_nids,
            'grouped_aids': grouped_aids,
            'unique_aids': unique_aids,
        }
        visit_info_list_.append(info)

    # Estimate statistics
    from ibeis.other import dbinfo
    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)
    nsight1 = len(nids_day1)
    nsight2 = len(nids_day2)
    resight = len(resight_nids)
    lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight)

    if False:
        from ibeis.other import dbinfo
        print('DAY 1 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1)  # NOQA
        print('DAY 2 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2)  # NOQA
        print('COMBINED STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2)  # NOQA

    print('%d annots on day 1' % (len(aids_day1)) )
    print('%d annots on day 2' % (len(aids_day2)) )
    print('%d names on day 1' % (nsight1,))
    print('%d names on day 2' % (nsight2,))
    print('resight = %r' % (resight,))
    print('lp_index = %r ± %r' % (lp_index, lp_error))
    return nsight1, nsight2, resight, lp_index, lp_error
Пример #46
0
    def mark_unreviewed_above_score_as_correct(qres_wgt):
        selected_qtindex_list = qres_wgt.selectedRows()
        if len(selected_qtindex_list) == 1:
            qtindex = selected_qtindex_list[0]
            # aid1, aid2 = qres_wgt.get_aidpair_from_qtindex(qtindex)
            thresh = qtindex.model().get_header_data('score', qtindex)
            logger.info('thresh = %r' % (thresh, ))

            rows = qres_wgt.review_api.ider()
            scores_ = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('score'), rows)
            valid_rows = ut.compress(rows, scores_ >= thresh)
            aids1 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('qaid'), valid_rows)
            aids2 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('aid'), valid_rows)
            # ibs = qres_wgt.ibs
            ibs = qres_wgt.ibs
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(
                aids1, aids2)
            reviewed = ibs.get_annotmatch_reviewed(am_rowids)
            unreviewed = ut.not_list(reviewed)

            valid_rows = ut.compress(valid_rows, unreviewed)
            aids1 = ut.compress(aids1, unreviewed)
            aids2 = ut.compress(aids2, unreviewed)

            import networkx as nx

            graph = nx.Graph()
            graph.add_edges_from(list(zip(aids1, aids2)),
                                 {'user_thresh_match': True})
            review_groups = list(nx.connected_component_subgraphs(graph))

            changing_aids = list(graph.nodes())
            nids = ibs.get_annot_nids(changing_aids)
            nid2_aids = ut.group_items(changing_aids, nids)
            for nid, aids in nid2_aids.items():
                # Connect all original names in the database to denote merges
                for u, v in ut.itertwo(aids):
                    graph.add_edge(u, v)
            dbside_groups = list(nx.connected_component_subgraphs(graph))

            options = [
                'Accept',
                # 'Review More'
            ]
            msg = (ut.codeblock("""
                There are %d names and %d annotations in this mass review set.
                Mass review has discovered %d internal groups.
                Accepting will induce a database grouping of %d names.
                """) % (
                len(nid2_aids),
                len(changing_aids),
                len(review_groups),
                len(dbside_groups),
            ))

            reply = gt.user_option(msg=msg, options=options)

            if reply == options[0]:
                # This is not the smartest way to group names.
                # Ideally what will happen here, is that reviewed edges will go into
                # the new graph name inference algorithm.
                # then the chosen point will be used as the threshold. Then
                # the graph cut algorithm will be applied.
                logger_ = qres_wgt.logger
                logger_.debug(msg)
                logger_.info('START MASS_THRESHOLD_MERGE')
                logger_.info('num_groups=%d thresh=%r' % (
                    len(dbside_groups),
                    thresh,
                ))
                for count, subgraph in enumerate(dbside_groups):
                    thresh_aid_pairs = [
                        edge for edge, flag in nx.get_edge_attributes(
                            graph, 'user_thresh_match').items() if flag
                    ]
                    thresh_uuid_pairs = ibs.unflat_map(ibs.get_annot_uuids,
                                                       thresh_aid_pairs)
                    aids = list(subgraph.nodes())
                    nids = ibs.get_annot_name_rowids(aids)
                    flags = ut.not_list(ibs.is_aid_unknown(aids))
                    previous_names = ibs.get_name_texts(nids)
                    valid_nids = ut.compress(nids, flags)
                    if len(valid_nids) == 0:
                        merge_nid = ibs.make_next_nids(num=1)[0]
                        type_ = 'new'
                    else:
                        merge_nid = min(valid_nids)
                        type_ = 'existing'

                    # Need to find other non-exemplar / query names that may
                    # need merging
                    other_aids = ibs.get_name_aids(valid_nids)
                    other_aids = set(ut.flatten(other_aids)) - set(aids)
                    other_auuids = ibs.get_annot_uuids(other_aids)
                    other_previous_names = ibs.get_annot_names(other_aids)

                    merge_name = ibs.get_name_texts(merge_nid)
                    annot_uuids = ibs.get_annot_uuids(aids)
                    ###
                    # Set as reviewed (so we dont see them again), but mark it
                    # with a different code to denote that it was a MASS review
                    aid1_list = ut.take_column(thresh_aid_pairs, 0)
                    aid2_list = ut.take_column(thresh_aid_pairs, 1)
                    am_rowids = ibs.add_annotmatch_undirected(
                        aid1_list, aid2_list)
                    ibs.set_annotmatch_reviewer(
                        am_rowids, ['algo:lnbnn_thresh'] * len(am_rowids))

                    logger_.info('START GROUP %d' % (count, ))
                    logger_.info(
                        'GROUP BASED ON %d ANNOT_PAIRS WITH SCORE ABOVE (thresh=%r)'
                        % (
                            len(thresh_uuid_pairs),
                            thresh,
                        ))
                    logger_.debug('(uuid_pairs=%r)' % (thresh_uuid_pairs))
                    logger_.debug('(merge_name=%r)' % (merge_name))
                    logger_.debug(
                        'CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(annot_uuids),
                            annot_uuids,
                            previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.debug(
                        'ADDITIONAL CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(other_auuids),
                            other_auuids,
                            other_previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.info('END GROUP %d' % (count, ))
                    new_nids = [merge_nid] * len(aids)
                    ibs.set_annot_name_rowids(aids, new_nids)
                logger_.info('END MASS_THRESHOLD_MERGE')
        else:
            logger.info('[context] Multiple %d selection' %
                        (len(selected_qtindex_list), ))
Пример #47
0
def wildbook_signal_annot_name_changes(ibs, aid_list=None, wb_target=None,
                                       dryrun=False):
    r"""
    Args:
        aid_list (int):  list of annotation ids(default = None)
        tomcat_dpath (None): (default = None)
        wb_target (None): (default = None)
        dryrun (bool): (default = False)

    CommandLine:
        python -m ibeis wildbook_signal_annot_name_changes:0 --dryrun
        python -m ibeis wildbook_signal_annot_name_changes:1 --dryrun
        python -m ibeis wildbook_signal_annot_name_changes:1
        python -m ibeis wildbook_signal_annot_name_changes:2

    Setup:
        >>> wb_target = None
        >>> dryrun = ut.get_argflag('--dryrun')

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.control.manual_wildbook_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> #gid_list = ibs.get_valid_gids()[0:10]
        >>> gid_list = ibs.get_valid_gids()[3:5]
        >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list))
        >>> # Test case where some names change, some do not. There are no new names.
        >>> old_nid_list = ibs.get_annot_name_rowids(aid_list)
        >>> new_nid_list = ut.list_roll(old_nid_list, 1)
        >>> ibs.set_annot_name_rowids(aid_list, new_nid_list)
        >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun)
        >>> ibs.set_annot_name_rowids(aid_list, old_nid_list)

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.control.manual_wildbook_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> #gid_list = ibs.get_valid_gids()[0:10]
        >>> gid_list = ibs.get_valid_gids()[3:5]
        >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list))
        >>> # Test case where all names change to one known name
        >>> #old_nid_list = ibs.get_annot_name_rowids(aid_list)
        >>> #new_nid_list = [old_nid_list[0]] * len(old_nid_list)
        >>> old_nid_list = [1, 2]
        >>> new_nid_list = [1, 1]
        >>> print('old_nid_list = %r' % (old_nid_list,))
        >>> print('new_nid_list = %r' % (new_nid_list,))
        >>> ibs.set_annot_name_rowids(aid_list, new_nid_list)
        >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun)
        >>> # Undo changes here (not undone in wildbook)
        >>> #ibs.set_annot_name_rowids(aid_list, old_nid_list)

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.control.manual_wildbook_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> gid_list = ibs.get_valid_gids()[3:5]
        >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list))
        >>> old_nid_list = [1, 2]
        >>> ibs.set_annot_name_rowids(aid_list, old_nid_list)
        >>> # Signal what currently exists (should put them back to normal)
        >>> result = ibs.wildbook_signal_annot_name_changes(aid_list, wb_target, dryrun)
    """
    print('[ibs.wildbook_signal_imgsetid_list] signaling annot name changes to wildbook')
    wb_url = ibs.get_wildbook_base_url(wb_target)
    try:
        ibs.assert_ia_available_for_wb(wb_target)
    except Exception:
        pass
    if aid_list is None:
        aid_list = ibs.get_valid_aids(is_known=True)

    annot_uuid_list = ibs.get_annot_uuids(aid_list)
    annot_name_text_list = ibs.get_annot_name_texts(aid_list)
    grouped_uuids = ut.group_items(annot_uuid_list, annot_name_text_list)
    url = wb_url + '/ia'
    payloads = [
        {'resolver': {'assignNameToAnnotations': {
            'name': new_name,
            'annotationIds' : ut.lmap(str, annot_uuids),
        }}}
        for new_name, annot_uuids in grouped_uuids.items()
    ]
    status_list = []
    for json_payload in ut.ProgressIter(payloads, lbl='submitting URL', freq=1):
        print('[_send] URL=%r with json_payload=%r' % (url, json_payload))
        if dryrun:
            status = False
        else:
            response = requests.post(url, json=json_payload)
            status = response.status_code == 200
            if not status:
                print('Failed to push new names')
                print(response.text)
        status_list.append(status)
    return status_list
Пример #48
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Пример #49
0
def check_results(ibs_gt, ibs2, aid1_to_aid2, aids_list1_, incinfo):
    """
    reports how well the incremental query ran when the oracle was calling the
    shots.
    """
    print('--------- CHECKING RESULTS ------------')
    testcases = incinfo.get('testcases')
    if testcases is not None:
        count_dict = ut.count_dict_vals(testcases)
        print('+--')
        #print(ut.dict_str(testcases))
        print('---')
        print(ut.dict_str(count_dict))
        print('L__')
    # TODO: dont include initially added aids in the result reporting
    aid_list1 = aids_list1_  # ibs_gt.get_valid_aids()
    #aid_list1 = ibs_gt.get_aids_with_groundtruth()
    aid_list2 = ibs2.get_valid_aids()

    nid_list1 = ibs_gt.get_annot_nids(aid_list1)
    nid_list2 = ibs2.get_annot_nids(aid_list2)

    # Group annotations from test and gt database by their respective names
    grouped_dict1 = ut.group_items(aid_list1, nid_list1)
    grouped_dict2 = ut.group_items(aid_list2, nid_list2)
    grouped_aids1 = list(six.itervalues(grouped_dict1))
    grouped_aids2 = list(map(tuple, six.itervalues(grouped_dict2)))
    #group_nids1 = list(six.iterkeys(grouped_dict1))
    #group_nids2 = list(six.iterkeys(grouped_dict2))

    # Transform annotation ids from database1 space to database2 space
    grouped_aids1_t = [tuple(ut.dict_take_list(aid1_to_aid2, aids1)) for aids1 in grouped_aids1]

    set_grouped_aids1_t = set(grouped_aids1_t)
    set_grouped_aids2   = set(grouped_aids2)

    # Find names we got right. (correct groupings of annotations)
    # these are the annotation groups that are intersecting between
    # the test database and groundtruth database
    perfect_groups = set_grouped_aids2.intersection(set_grouped_aids1_t)
    # Find names we got wrong. (incorrect groupings of annotations)
    # The test database sets that were not perfect
    nonperfect_groups = set_grouped_aids2.difference(perfect_groups)
    # What we should have got
    # The ground truth database sets that were not fully identified
    missed_groups = set_grouped_aids1_t.difference(perfect_groups)

    # Mark non perfect groups by their error type
    false_negative_groups = []  # failed to link enough
    false_positive_groups = []  # linked too much
    for nonperfect_group in nonperfect_groups:
        if ut.is_subset_of_any(nonperfect_group, missed_groups):
            false_negative_groups.append(nonperfect_group)
        else:
            false_positive_groups.append(nonperfect_group)

    # Get some more info on the nonperfect groups
    # find which groups should have been linked
    aid2_to_aid1 = ut.invert_dict(aid1_to_aid2)
    false_negative_groups_t = [tuple(ut.dict_take_list(aid2_to_aid1, aids2)) for aids2 in false_negative_groups]
    false_negative_group_nids_t = ibs_gt.unflat_map(ibs_gt.get_annot_nids, false_negative_groups_t)
    assert all(map(ut.allsame, false_negative_group_nids_t)), 'inconsistent nids'
    false_negative_group_nid_t = ut.get_list_column(false_negative_group_nids_t, 0)
    # These are the links that should have been made
    missed_links = ut.group_items(false_negative_groups, false_negative_group_nid_t)

    print(ut.dict_str(missed_links))

    print('# Name with failed links (FN) = %r' % len(false_negative_groups))
    print('... should have reduced to %d names.' % (len(missed_links)))
    print('# Name with wrong links (FP)  = %r' % len(false_positive_groups))
    print('# Name correct names (TP)     = %r' % len(perfect_groups))
Пример #50
0
def temp_model(num_annots,
               num_names,
               score_evidence=[],
               name_evidence=[],
               other_evidence={},
               noquery=False,
               verbose=None,
               **kwargs):
    if verbose is None:
        verbose = ut.VERBOSE

    method = kwargs.pop('method', None)
    model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs)

    if verbose:
        model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE])

    model, evidence, soft_evidence = update_model_evidence(
        model, name_evidence, score_evidence, other_evidence)

    if verbose and len(soft_evidence) != 0:
        model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE],
                           title='Soft Evidence',
                           color='green')

    # if verbose:
    #    ut.colorprint('\n --- Soft Evidence ---', 'white')
    #    for ttype, cpds in model.ttype2_cpds.items():
    #        if ttype != MATCH_TTYPE:
    #            for fs_ in ut.ichunks(cpds, 4):
    #                ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]),
    #                              'green')

    if verbose:
        ut.colorprint('\n --- Inference ---', 'red')

    if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery:
        evidence = model._ensure_internal_evidence(evidence)
        query_vars = []
        query_vars += ut.list_getattr(model.ttype2_cpds[NAME_TTYPE],
                                      'variable')
        # query_vars += ut.list_getattr(model.ttype2_cpds[MATCH_TTYPE], 'variable')
        query_vars = ut.setdiff(query_vars, evidence.keys())
        # query_vars = ut.setdiff(query_vars, soft_evidence.keys())
        query_results = cluster_query(model, query_vars, evidence,
                                      soft_evidence, method)
    else:
        query_results = {}

    factor_list = query_results['factor_list']

    if verbose:
        if verbose:
            logger.info('+--------')
        semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list]
        for type_, factors in ut.group_items(factor_list, semtypes).items():
            logger.info('Result Factors (%r)' % (type_, ))
            factors = ut.sortedby(factors, [f.variables[0] for f in factors])
            for fs_ in ut.ichunks(factors, 4):
                ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                              'yellow')
        logger.info('MAP assignments')
        top_assignments = query_results.get('top_assignments', [])
        tmp = []
        for lbl, val in top_assignments:
            tmp.append('%s : %.4f' % (ut.repr2(lbl), val))
        logger.info(ut.align('\n'.join(tmp), ' :'))
        logger.info('L_____\n')

    showkw = dict(evidence=evidence,
                  soft_evidence=soft_evidence,
                  **query_results)

    from wbia.algo.hots import pgm_viz

    pgm_viz.show_model(model, **showkw)
    return (model, evidence, query_results)
Пример #51
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Пример #52
0
def test_model(num_annots, num_names, score_evidence=[], name_evidence=[],
               other_evidence={}, noquery=False, verbose=None,
               **kwargs):
    if verbose is None:
        verbose = ut.VERBOSE

    method = kwargs.pop('method', None)
    model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs)

    if verbose:
        model.print_priors(ignore_ttypes=['match', 'score'])

    model, evidence, soft_evidence = update_model_evidence(
        model, name_evidence, score_evidence, other_evidence)

    if verbose and len(soft_evidence) != 0:
        model.print_priors(ignore_ttypes=['match', 'score'],
                           title='Soft Evidence', color='green')

    #if verbose:
    #    ut.colorprint('\n --- Soft Evidence ---', 'white')
    #    for ttype, cpds in model.ttype2_cpds.items():
    #        if ttype != 'match':
    #            for fs_ in ut.ichunks(cpds, 4):
    #                ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]),
    #                              'green')

    if verbose:
        ut.colorprint('\n --- Inference ---', 'red')

    if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery:
        evidence = model._ensure_internal_evidence(evidence)
        query_vars = []
        query_vars += ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        #query_vars += ut.list_getattr(model.ttype2_cpds['match'], 'variable')
        query_vars = ut.setdiff(query_vars, evidence.keys())
        #query_vars = ut.setdiff(query_vars, soft_evidence.keys())
        query_results = cluster_query(model, query_vars, evidence,
                                      soft_evidence, method)
    else:
        query_results = {}

    factor_list = query_results['factor_list']

    if verbose:
        if verbose:
            print('+--------')
        semtypes = [model.var2_cpd[f.variables[0]].ttype
                    for f in factor_list]
        for type_, factors in ut.group_items(factor_list, semtypes).items():
            print('Result Factors (%r)' % (type_,))
            factors = ut.sortedby(factors, [f.variables[0] for f in factors])
            for fs_ in ut.ichunks(factors, 4):
                ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                              'yellow')
        print('MAP assignments')
        top_assignments = query_results.get('top_assignments', [])
        tmp = []
        for lbl, val in top_assignments:
            tmp.append('%s : %.4f' % (ut.repr2(lbl), val))
        print(ut.align('\n'.join(tmp), ' :'))
        print('L_____\n')

    showkw = dict(evidence=evidence,
                  soft_evidence=soft_evidence,
                  **query_results)

    pgm_viz.show_model(model, **showkw)
    return (model, evidence, query_results)
Пример #53
0
QUALITY_INT_TO_TEXT = OrderedDict([
    (5,  QUAL_EXCELLENT,),
    (4,  QUAL_GOOD,),
    (3,  QUAL_OK,),
    (2,  QUAL_POOR,),
    # oops forgot 1. will be mapped to poor
    (0,  QUAL_JUNK,),
    (-1, QUAL_UNKNOWN,),
])

QUALITY_TEXT_TO_INT       = ut.invert_dict(QUALITY_INT_TO_TEXT)
QUALITY_INT_TO_TEXT[1]    = QUAL_JUNK
#QUALITY_TEXT_TO_INTS      = ut.invert_dict(QUALITY_INT_TO_TEXT)
QUALITY_TEXT_TO_INTS = ut.group_items(
    list(QUALITY_INT_TO_TEXT.keys()),
    list(QUALITY_INT_TO_TEXT.values()))
QUALITY_TEXT_TO_INTS[QUAL_UNKNOWN] = -1
QUALITY_INT_TO_TEXT[None] = QUALITY_INT_TO_TEXT[-1]


SEX_INT_TO_TEXT = {
    None: 'UNKNOWN NAME',
    -1  : 'UNKNOWN SEX',
    0   : 'Female',
    1   : 'Male',
}
SEX_TEXT_TO_INT = ut.invert_dict(SEX_INT_TO_TEXT)


class PATH_NAMES(object):
Пример #54
0
    ),
    # oops forgot 1. will be mapped to poor
    (
        0,
        QUAL_JUNK,
    ),
    (
        -1,
        QUAL_UNKNOWN,
    ),
])

QUALITY_TEXT_TO_INT = ut.invert_dict(QUALITY_INT_TO_TEXT)
QUALITY_INT_TO_TEXT[1] = QUAL_JUNK
# QUALITY_TEXT_TO_INTS      = ut.invert_dict(QUALITY_INT_TO_TEXT)
QUALITY_TEXT_TO_INTS = ut.group_items(list(QUALITY_INT_TO_TEXT.keys()),
                                      list(QUALITY_INT_TO_TEXT.values()))
QUALITY_TEXT_TO_INTS[QUAL_UNKNOWN] = -1
QUALITY_INT_TO_TEXT[None] = QUALITY_INT_TO_TEXT[-1]

SEX_INT_TO_TEXT = {
    None: 'UNKNOWN NAME',
    -1: 'UNKNOWN SEX',
    0: 'Female',
    1: 'Male',
    2: 'INDETERMINATE SEX',
}
SEX_TEXT_TO_INT = ut.invert_dict(SEX_INT_TO_TEXT)


class PATH_NAMES(object):  # NOQA
    """ Path names for internal IBEIS database """
Пример #55
0
def edges_to_adjacency_list(edges):
    import utool as ut
    children_, parents_ = list(zip(*edges))
    parent_to_children = ut.group_items(parents_, children_)
    #to_leafs = {tablename: path_to_leafs(tablename, parent_to_children)}
    return parent_to_children
Пример #56
0
def check_results(ibs_gt, ibs2, aid1_to_aid2, aids_list1_, incinfo):
    """
    reports how well the incremental query ran when the oracle was calling the
    shots.
    """
    print('--------- CHECKING RESULTS ------------')
    testcases = incinfo.get('testcases')
    if testcases is not None:
        count_dict = ut.count_dict_vals(testcases)
        print('+--')
        #print(ut.dict_str(testcases))
        print('---')
        print(ut.dict_str(count_dict))
        print('L__')
    # TODO: dont include initially added aids in the result reporting
    aid_list1 = aids_list1_  # ibs_gt.get_valid_aids()
    #aid_list1 = ibs_gt.get_aids_with_groundtruth()
    aid_list2 = ibs2.get_valid_aids()

    nid_list1 = ibs_gt.get_annot_nids(aid_list1)
    nid_list2 = ibs2.get_annot_nids(aid_list2)

    # Group annotations from test and gt database by their respective names
    grouped_dict1 = ut.group_items(aid_list1, nid_list1)
    grouped_dict2 = ut.group_items(aid_list2, nid_list2)
    grouped_aids1 = list(six.itervalues(grouped_dict1))
    grouped_aids2 = list(map(tuple, six.itervalues(grouped_dict2)))
    #group_nids1 = list(six.iterkeys(grouped_dict1))
    #group_nids2 = list(six.iterkeys(grouped_dict2))

    # Transform annotation ids from database1 space to database2 space
    grouped_aids1_t = [
        tuple(ut.dict_take_list(aid1_to_aid2, aids1))
        for aids1 in grouped_aids1
    ]

    set_grouped_aids1_t = set(grouped_aids1_t)
    set_grouped_aids2 = set(grouped_aids2)

    # Find names we got right. (correct groupings of annotations)
    # these are the annotation groups that are intersecting between
    # the test database and groundtruth database
    perfect_groups = set_grouped_aids2.intersection(set_grouped_aids1_t)
    # Find names we got wrong. (incorrect groupings of annotations)
    # The test database sets that were not perfect
    nonperfect_groups = set_grouped_aids2.difference(perfect_groups)
    # What we should have got
    # The ground truth database sets that were not fully identified
    missed_groups = set_grouped_aids1_t.difference(perfect_groups)

    # Mark non perfect groups by their error type
    false_negative_groups = []  # failed to link enough
    false_positive_groups = []  # linked too much
    for nonperfect_group in nonperfect_groups:
        if ut.is_subset_of_any(nonperfect_group, missed_groups):
            false_negative_groups.append(nonperfect_group)
        else:
            false_positive_groups.append(nonperfect_group)

    # Get some more info on the nonperfect groups
    # find which groups should have been linked
    aid2_to_aid1 = ut.invert_dict(aid1_to_aid2)
    false_negative_groups_t = [
        tuple(ut.dict_take_list(aid2_to_aid1, aids2))
        for aids2 in false_negative_groups
    ]
    false_negative_group_nids_t = ibs_gt.unflat_map(ibs_gt.get_annot_nids,
                                                    false_negative_groups_t)
    assert all(map(ut.allsame,
                   false_negative_group_nids_t)), 'inconsistent nids'
    false_negative_group_nid_t = ut.get_list_column(
        false_negative_group_nids_t, 0)
    # These are the links that should have been made
    missed_links = ut.group_items(false_negative_groups,
                                  false_negative_group_nid_t)

    print(ut.dict_str(missed_links))

    print('# Name with failed links (FN) = %r' % len(false_negative_groups))
    print('... should have reduced to %d names.' % (len(missed_links)))
    print('# Name with wrong links (FP)  = %r' % len(false_positive_groups))
    print('# Name correct names (TP)     = %r' % len(perfect_groups))
Пример #57
0
def make_agraph(graph):
    # FIXME; use this in nx_agraph_layout instead to comparementalize more
    import networkx as nx
    import pygraphviz
    # Convert to agraph format
    graph_ = graph.copy()

    ut.nx_ensure_agraph_color(graph_)
    # Reduce size to be in inches not pixels
    # FIXME: make robust to param settings
    # Hack to make the w/h of the node take thae max instead of
    # dot which takes the minimum
    shaped_nodes = [n for n, d in graph_.nodes(data=True) if 'width' in d]
    node_attrs = ut.dict_take(graph_.node, shaped_nodes)
    width_px = np.array(ut.take_column(node_attrs, 'width'))
    height_px = np.array(ut.take_column(node_attrs, 'height'))
    scale = np.array(ut.dict_take_column(node_attrs, 'scale', default=1.0))

    width_in = width_px / 72.0 * scale
    height_in = height_px / 72.0 * scale
    width_in_dict = dict(zip(shaped_nodes, width_in))
    height_in_dict = dict(zip(shaped_nodes, height_in))
    nx.set_node_attributes(graph_, 'width', width_in_dict)
    nx.set_node_attributes(graph_, 'height', height_in_dict)
    ut.nx_delete_node_attr(graph_, 'scale')

    # Check for any nodes with groupids
    node_to_groupid = nx.get_node_attributes(graph_, 'groupid')
    if node_to_groupid:
        groupid_to_nodes = ut.group_items(*zip(*node_to_groupid.items()))
    else:
        groupid_to_nodes = {}
    # Initialize agraph format
    #import utool
    #utool.embed()
    ut.nx_delete_None_edge_attr(graph_)
    agraph = nx.nx_agraph.to_agraph(graph_)
    # Add subgraphs labels
    # TODO: subgraph attrs
    for groupid, nodes in groupid_to_nodes.items():
        subgraph_attrs = {}
        #subgraph_attrs = dict(rankdir='LR')
        #subgraph_attrs['rank'] = 'min'
        subgraph_attrs['rank'] = 'same'
        name = groupid
        name = 'cluster_' + groupid
        agraph.add_subgraph(nodes, name, **subgraph_attrs)
    for node in graph_.nodes():
        # force pinning of node points
        anode = pygraphviz.Node(agraph, node)
        if anode.attr['pin'] == 'true':
            if anode.attr['pos'] is not None and not anode.attr['pos'].endswith('!'):
                import re
                #utool.embed()
                ptstr = anode.attr['pos'].strip('[]').strip(' ')
                ptstr_list = re.split(r'\s+', ptstr)
                pt_arr = np.array(list(map(float, ptstr_list))) / 72.0
                #print('pt_arr = %r' % (pt_arr,))
                new_ptstr_list = list(map(str, pt_arr))
                new_ptstr = ','.join(new_ptstr_list) + '!'
                #print('new_ptstr = %r' % (new_ptstr,))
                anode.attr['pos'] = new_ptstr
    return agraph