示例#1
0
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None):
    r"""
    DEPRICATE

    fm_list = [fm[:min(len(fm), 10)] for fm in fm_list]
    fs_list = [fs[:min(len(fs), 10)] for fs in fs_list]
    """
    fx1_list = [fm.T[0] for fm in fm_list]
    # Group annotation matches by name
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list  = vt.apply_grouping_(fs_list,  name_groupxs)
    # Stack up all matches to a particular name, keep track of original indicies via offets
    name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list))
    name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0)
    name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1)
    name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list))
    if kpts1 is not None:
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_data_ids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_comboid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat)
        xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_comboid_flat)
        name_group_fx1_groupxs_list = xyid_groupxs_list
    else:
        # Make nested group for every name by query feature index
        fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat]
        name_group_fx1_groupxs_list = fx1_groupxs_list
    name_grouped_fid_grouped_fs_list = [
        vt.apply_grouping(fs_flat, fid_groupxs)
        for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list)
    ]

    # Flag which features are valid in this grouped space. Only one keypoint should be able to vote
    # for each group
    name_grouped_fid_grouped_isvalid_list = [
        np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list])
        for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list
    ]

    # Go back to being grouped only in name space
    #dtype = np.bool
    name_grouped_isvalid_flat_list = [
        vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool)
        for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list)
    ]

    name_grouped_isvalid_unflat_list = [
        ut.unflatten2(isvalid_flat, invertable_cumsum_list)
        for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list)
    ]

    # Reports which features were valid in name scoring for every annotation
    featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs)
    return featflag_list
示例#2
0
    def done_part(cand, num_neighbs):
        # Find the first `num_neighbs` complete columns in each row
        rowxs, colxs = np.where(cand.validflags)
        unique_rows, groupxs = vt.group_indices(rowxs, assume_sorted=True)
        first_k_groupxs = [groupx[0:num_neighbs] for groupx in groupxs]
        if DEBUG_REQUERY:
            assert all(ut.issorted(groupx) for groupx in groupxs)
            assert all(
                [len(group) == num_neighbs for group in first_k_groupxs])
        chosen_xs = np.array(ut.flatten(first_k_groupxs), dtype=np.int)
        # chosen_xs = np.hstack(first_k_groupxs)
        # then convert these to multi-indices
        done_rows = rowxs.take(chosen_xs)
        done_cols = colxs.take(chosen_xs)
        multi_index = (done_rows, done_cols)
        # done_shape = (cand.validflags.shape[0], num_neighbs)
        # flat_xs = np.ravel_multi_index(multi_index, done_shape)
        flat_xs = np.ravel_multi_index(multi_index, cand.idxs.shape)
        _shape = (-1, num_neighbs)
        idxs = cand.idxs.take(flat_xs).reshape(_shape)
        dists = cand.dists.take(flat_xs).reshape(_shape)

        trueks = colxs.take(chosen_xs).reshape(_shape)
        if DEBUG_REQUERY:
            # dists2 = dists.copy()
            for count, (row, cols) in enumerate(zip(unique_rows, groupxs)):
                pass
            assert np.all(np.diff(dists, axis=1) >= 0)
            valid = cand.validflags.take(flat_xs).reshape(_shape)
            assert np.all(valid)
        return idxs, dists, trueks
示例#3
0
文件: bayes.py 项目: warunanc/ibeis
def report_partitioning_statistics(new_reduced_joint):
    # compute partitioning statistics
    import vtool_ibeis as vt
    vals, idxs = vt.group_indices(new_reduced_joint.values.ravel())
    #groupsize = list(map(len, idxs))
    #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs)
    all_states = new_reduced_joint._row_labels(asindex=True)
    clusterstats = [tuple(sorted(list(ut.dict_hist(a).values())))
                    for a in all_states]
    grouped_vals = ut.group_items(new_reduced_joint.values.ravel(),
                                  clusterstats)

    #probs_assigned_to_clustertype = [(
    #    sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a)
    #    for a, b in grouped_vals.items()]
    probs_assigned_to_clustertype = [(
        ut.dict_hist(np.array(b).round(decimals=5)), a)
        for a, b in grouped_vals.items()]
    sortx = ut.argsort([max(c[0].keys())
                        for c in probs_assigned_to_clustertype])
    probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx)

    # This list of 2-tuples with the first item being the unique
    # probabilies that are assigned to a cluster type along with the number
    # of times they were assigned. A cluster type is the second item. Every
    # number represents how many annotations were assigned to a specific
    # label. The length of that list is the number of total labels.  For
    # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]]
    # indicating that that the assignment of everyone to a different label happend once
    # where the probability was somenum and a 800 times where the probability was 0.

    #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items())
    #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()])
    print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
示例#4
0
    def _make_anygroup_hashes(annots, nids):
        """ helper function

            import ibeis
            qreq_ = ibeis.testdata_qreq_(
                defaultdb='PZ_MTEST',
                qaid_override=[1, 2, 3, 4, 5, 6, 10, 11],
                daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24],
                )

            import ibeis
            qreq_ = ibeis.testdata_qreq_(defaultdb='PZ_Master1')
            %timeit qreq_._make_namegroup_data_hashes()
            %timeit qreq_._make_namegroup_data_uuids()

        """
        # make sure items are sorted to ensure same assignment
        # gives same uuids
        # annots = qreq_.ibs.annots(sorted(qreq_.daids))
        unique_nids, groupxs = vt.group_indices(nids)
        grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs)
        group_hashes = [
            ut.combine_hashes(sorted(u.bytes for u in uuids),
                              hasher=hashlib.sha1())
            for uuids in grouped_visual_uuids
        ]
        nid_to_grouphash = dict(zip(unique_nids, group_hashes))
        return nid_to_grouphash
示例#5
0
def group_images_by_label(label_arr, gid_arr):
    """
    Input: Length N list of labels and ids
    Output: Length M list of unique labels, and lenth M list of lists of ids
    """
    # Reverse the image to cluster index mapping
    import vtool_ibeis as vt
    labels_, groupxs_ = vt.group_indices(label_arr)
    sortx = np.array(list(map(len, groupxs_))).argsort()[::-1]
    labels  = labels_.take(sortx, axis=0)
    groupxs = ut.take(groupxs_, sortx)
    label_gids = vt.apply_grouping(gid_arr, groupxs)
    return labels, label_gids
示例#6
0
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object
        aid1_list (list):
        aid2_list (list):
        directed (bool): (default = True)

    Returns:
        list: tags_list

    CommandLine:
        python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.tag_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> has_any = ut.get_argval('--tags', type_=list, default=None)
        >>> min_num = ut.get_argval('--min_num', type_=int, default=1)
        >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1)
        >>> aid1_list = aid_pairs.T[0]
        >>> aid2_list = aid_pairs.T[1]
        >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False)
        >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        >>> print(ut.repr2(tagged_pairs))
        >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        >>> print(ut.repr2(tag_dict, nl=2))
        >>> print(ut.repr2(ut.map_dict_vals(len, tag_dict)))
    """
    aid_pairs = np.vstack([aid1_list, aid2_list]).T
    if directed:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
    else:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_undirected_superkey(aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
        if False:
            expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]])
            expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(
                expanded_aid_pairs.T[0], expanded_aid_pairs.T[1])
            expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs)
            unique_edgeids, groupxs = vt.group_indices(expanded_edgeids)
            expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid)
            grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs)
            undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags]
            edgeid2_tags = dict(zip(unique_edgeids, undirected_tags))
            input_edgeids = expanded_edgeids[:len(aid_pairs)]
            tags_list = ut.dict_take(edgeid2_tags, input_edgeids)
    return tags_list
示例#7
0
文件: scoring.py 项目: warunanc/ibeis
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list,
                            name_score_list, nid2_nidx,
                            nNameShortList, nAnnotPerName):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scoring import *  # NOQA
        >>> daid_list        = np.array([11, 12, 13, 14, 15, 16, 17])
        >>> dnid_list        = np.array([21, 21, 21, 22, 22, 23, 24])
        >>> annot_score_list = np.array([ 6,  2,  3,  5,  6,  3,  2])
        >>> name_score_list  = np.array([ 8,  9,  5,  4])
        >>> nid2_nidx        = {21:0, 22:1, 23:2, 24:3}
        >>> nNameShortList, nAnnotPerName = 3, 2
        >>> args = (daid_list, dnid_list, annot_score_list, name_score_list,
        ...         nid2_nidx, nNameShortList, nAnnotPerName)
        >>> top_daids = get_name_shortlist_aids(*args)
        >>> result = str(top_daids)
        >>> print(result)
        [15, 14, 11, 13, 16]
    """
    unique_nids, groupxs    = vt.group_indices(np.array(dnid_list))
    grouped_annot_scores    = vt.apply_grouping(annot_score_list, groupxs)
    grouped_daids           = vt.apply_grouping(np.array(daid_list), groupxs)
    # Ensure name score list is aligned with the unique_nids
    aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids))
    # Sort each group by the name score
    group_sortx             = aligned_name_score_list.argsort()[::-1]
    _top_daid_groups        = ut.take(grouped_daids, group_sortx)
    _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx)
    top_daid_groups         = ut.listclip(_top_daid_groups, nNameShortList)
    top_annot_score_groups  = ut.listclip(_top_annot_score_groups, nNameShortList)
    # Sort within each group by the annotation score
    top_daid_sortx_groups   = [annot_score_group.argsort()[::-1]
                               for annot_score_group in top_annot_score_groups]
    top_sorted_daid_groups  = vt.ziptake(top_daid_groups, top_daid_sortx_groups)
    top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName)
                         for sorted_daid_group in top_sorted_daid_groups]
    top_daids = ut.flatten(top_clipped_daids)
    return top_daids
示例#8
0
文件: pgm_ext.py 项目: warunanc/ibeis
    def consolidate(self, inplace=False):
        """ removes duplicate entries

        Example:
            >>> # UNSTABLE_DOCTEST
            >>> from ibeis.algo.hots.pgm_ext import *  # NOQA
            >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]]
            >>> weights = [.1, .2, .1]
            >>> variables = ['v1', 'v2', 'v3']
            >>> self = ApproximateFactor(state_idxs, weights, variables)
            >>> inplace = False
            >>> phi = self.consolidate(inplace)
            >>> result = str(phi)
            >>> print(result)
            +------+------+------+-----------------------+
            | v1   | v2   | v3   |   \hat{phi}(v1,v2,v3) |
            |------+------+------+-----------------------|
            | v1_1 | v2_0 | v3_1 |                0.3000 |
            | v1_1 | v2_0 | v3_2 |                0.1000 |
            +------+------+------+-----------------------+
        """
        import vtool_ibeis as vt

        phi = self.copy() if inplace else self
        #data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs)
        data_ids = self._compute_unique_state_ids()
        unique_ids, groupxs = vt.group_indices(data_ids)
        #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs)))))
        if len(data_ids) != len(unique_ids):
            # Sum the values in the cpd to marginalize the duplicate probs
            # Take only the unique rows under this induced labeling
            unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs])
            self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0)
            self.weights = np.array(
                [g.sum() for g in vt.apply_grouping(self.weights, groupxs)])
            #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),))
        #else:
        #    print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),))
        if not inplace:
            return phi
示例#9
0
文件: bayes.py 项目: warunanc/ibeis
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs,
                    reduced_values):
    import vtool_ibeis as vt
    #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten())
    reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables]

    evidence_vars = list(evidence.keys())
    evidence_state_idxs = ut.dict_take(evidence, evidence_vars)
    evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars]

    ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes)))
    ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes)))
    # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes)
    # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes)

    # Allow specific types of labels to change
    # everything is the same, only the names have changed.
    # TODO: allow for multiple different label_ttypes
    # for label_ttype in label_ttypes
    if NAME_TTYPE not in model.ttype2_template:
        return reduced_row_idxs, reduced_values
    label_ttypes = [NAME_TTYPE]
    for label_ttype in label_ttypes:
        ev_colxs = ttype2_ev_indices[label_ttype]
        re_colxs = ttype2_re_indices[label_ttype]

        ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs)
        ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int)
        num_ev_ = len(ev_colxs)

        aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist()
        aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs])

        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.

        num_cols = len(aug_state_idxs.T)
        mask = vt.index_to_boolmask(aug_colxs, num_cols)
        other_colxs, = np.where(~mask)
        relbl_states = aug_state_idxs.compress(mask, axis=1)
        other_states = aug_state_idxs.compress(~mask, axis=1)
        tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states)))

        max_tmp_state = -1
        min_tmp_state = tmp_relbl_states.min()

        # rebuild original state structure with temp state idxs
        tmp_state_cols = [None] * num_cols
        for count, colx in enumerate(aug_colxs):
            tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1]
        for count, colx in enumerate(other_colxs):
            tmp_state_cols[colx] = other_states[:, count:count + 1]
        tmp_state_idxs = np.hstack(tmp_state_cols)

        data_ids = np.array(
            vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs))))
        unique_ids, groupxs = vt.group_indices(data_ids)
        print('Collapsed %r states into %r states' % (
            len(data_ids), len(unique_ids),))
        # Sum the values in the cpd to marginalize the duplicate probs
        new_values = np.array([
            g.sum() for g in vt.apply_grouping(reduced_values, groupxs)
        ])
        # Take only the unique rows under this induced labeling
        unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0))
        new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0)

        tmp_idx_set = set((-np.arange(-max_tmp_state,
                                      (-min_tmp_state) + 1)).tolist())
        true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis)))

        # Relabel the rows one more time to agree with initial constraints
        for colx, true_idx in enumerate(ev_state_idxs):
            tmp_idx = np.unique(new_aug_state_idxs.T[colx])
            assert len(tmp_idx) == 1
            tmp_idx_set -= {tmp_idx[0]}
            true_idx_set -= {true_idx}
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx
        # Relabel the remaining idxs
        remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1]
        remain_true_idxs = sorted(list(true_idx_set))
        for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs):
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx

        # Remove evidence based augmented labels
        new_state_idxs = new_aug_state_idxs.T[num_ev_:].T
        return new_state_idxs, new_values
示例#10
0
def cluster_timespace_sec(posixtimes, latlons, thresh_sec=5, km_per_sec=KM_PER_SEC):
    """
    Args:
        X_data (ndarray) : Nx3 array where columns are (seconds, lat, lon)
        thresh_sec (float) : threshold in seconds

    Doctest:
        >>> from ibeis.algo.preproc.occurrence_blackbox import *  # NOQA
        >>> # Nx1 matrix denoting groundtruth locations (for testing)
        >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2])
        >>> # Nx3 matrix where each columns are (time, lat, lon)
        >>> X_data = np.array([
        >>>     (0, 42.727985, -73.683994),  # MRC
        >>>     (0, 42.657414, -73.774448),  # Park1
        >>>     (0, 42.658333, -73.770993),  # Park2
        >>>     (0, 42.654384, -73.768919),  # Park3
        >>>     (0, 42.655039, -73.769048),  # Park4
        >>>     (0, 42.657872, -73.764148),  # Park5
        >>>     (0, 42.876974, -73.819311),  # CP1
        >>>     (0, 42.862946, -73.804977),  # CP2
        >>>     (0, 42.849809, -73.758486),  # CP3
        >>> ])
        >>> posixtimes = X_data.T[0]
        >>> latlons = X_data.T[1:3].T
        >>> thresh_sec = 250  # seconds
        >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec)
        >>> result = ('X_labels = %r' % (X_labels,))
        >>> print(result)
        X_labels = array([6, 4, 4, 4, 4, 5, 1, 2, 3])

    Doctest:
        >>> from ibeis.algo.preproc.occurrence_blackbox import *  # NOQA
        >>> # Nx1 matrix denoting groundtruth locations (for testing)
        >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2])
        >>> # Nx3 matrix where each columns are (time, lat, lon)
        >>> X_data = np.array([
        >>>     (np.nan, 42.657414, -73.774448),  # Park1
        >>>     (0, 42.658333, -73.770993),  # Park2
        >>>     (np.nan, np.nan, np.nan),  # Park3
        >>>     (np.nan, np.nan, np.nan),  # Park3.5
        >>>     (0, 42.655039, -73.769048),  # Park4
        >>>     (0, 42.657872, -73.764148),  # Park5
        >>> ])
        >>> posixtimes = X_data.T[0]
        >>> latlons = X_data.T[1:3].T
        >>> thresh_sec = 250  # seconds
        >>> km_per_sec = KM_PER_SEC
        >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec)
        >>> result = 'X_labels = {}'.format(ut.repr2(X_labels))
        >>> print(result)
        X_labels = np.array([3, 4, 1, 2, 4, 5])
    """
    X_data, dist_func, columns = prepare_data(posixtimes, latlons, km_per_sec,
                                              'seconds')
    if X_data is None:
        return None

    # Cluster nan distributions differently
    X_bools = ~np.isnan(X_data)
    group_id = (X_bools * np.power(2, [2, 1, 0])).sum(axis=1)
    import vtool_ibeis as vt
    unique_ids, groupxs = vt.group_indices(group_id)
    grouped_labels = []
    for xs in groupxs:
        X_part = X_data.take(xs, axis=0)
        labels = _cluster_part(X_part, dist_func, columns, thresh_sec,
                               km_per_sec)
        grouped_labels.append((labels, xs))
    # Undo grouping and rectify overlaps
    X_labels = _recombine_labels(grouped_labels)
    # Do clustering
    return X_labels
示例#11
0
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.bayes --exec-try_query --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.bayes import *  # NOQA
        >>> verbose = True
        >>> other_evidence = {}
        >>> name_evidence = [1, None, 0, None]
        >>> score_evidence = ['high', 'low', 'low']
        >>> query_vars = None
        >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1)
        >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence)
        >>> interest_ttypes = ['name']
        >>> infr = pgmpy.inference.BeliefPropagation(model)
        >>> evidence = infr._ensure_internal_evidence(evidence, model)
        >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose)
        >>> result = ('query_results = %s' % (str(query_results),))
        >>> ut.quit_if_noshow()
        >>> show_model(model, show_prior=True, **query_results)
        >>> ut.show_if_requested()

    Ignore:
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        probs = infr.query(query_vars, evidence)
        map_assignment = infr.map_query(query_vars, evidence)
    """
    infr = pgmpy.inference.VariableElimination(model)
    #infr = pgmpy.inference.BeliefPropagation(model)
    if True:
        return bruteforce(model, query_vars=None, evidence=evidence)
    else:
        import vtool_ibeis as vt
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        # hack
        query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable'))
        if verbose:
            evidence_str = ', '.join(model.pretty_evidence(evidence))
            print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ')
        # Compute MAP joints
        # There is a bug here.
        #map_assign = infr.map_query(query_vars, evidence)
        # (probably an invalid thing to do)
        #joint_factor = pgmpy.factors.factor_product(*factor_list)
        # Brute force MAP

        name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys()))
        # TODO: incorporate case where Na is assigned to Fred
        #evidence_h = ut.delete_keys(evidence.copy(), ['Na'])

        joint = model.joint_distribution()
        joint.evidence_based_reduction(
            query_name_vars, evidence, inplace=True)

        # Find static row labels in the evidence
        given_name_vars = [var for var in name_vars if var in evidence]
        given_name_idx = ut.dict_take(evidence, given_name_vars)
        given_name_val = [joint.statename_dict[var][idx]
                          for var, idx in zip(given_name_vars, given_name_idx)]
        new_vals = joint.values.ravel()
        # Add static evidence variables to the relabeled name states
        new_vars = given_name_vars + joint.variables
        new_rows = [tuple(given_name_val) + row for row in joint._row_labels()]
        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.
        temp_basis = [i for i in range(model.num_names)]
        def relabel_names(names, temp_basis=temp_basis):
            names = list(map(six.text_type, names))
            mapping = {}
            for n in names:
                if n not in mapping:
                    mapping[n] = len(mapping)
            new_names = tuple([temp_basis[mapping[n]] for n in names])
            return new_names
        relabeled_rows = list(map(relabel_names, new_rows))
        # Combine probability of rows with the same (new) label
        data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows))
        unique_ids, groupxs = vt.group_indices(data_ids)
        reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0))
        reduced_row_lbls = list(map(list, reduced_row_lbls))
        reduced_values = np.array([
            g.sum() for g in vt.apply_grouping(new_vals, groupxs)
        ])
        # Relabel the rows one more time to agree with initial constraints
        used_ = []
        replaced = []
        for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)):
            # All columns must be the same for this labeling
            alias = reduced_row_lbls[0][colx]
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val)
            replaced.append(alias)
            used_.append(val)
        basis = model.ttype2_cpds['name'][0]._template_.basis
        find_remain_ = ut.setdiff_ordered(temp_basis, replaced)
        repl_remain_ = ut.setdiff_ordered(basis, used_)
        for find, repl in zip(find_remain_, repl_remain_):
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl)

        # Now find the most likely state
        sortx = reduced_values.argsort()[::-1]
        sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist())
        sort_reduced_values = reduced_values[sortx]

        # Remove evidence based labels
        new_vars_ = new_vars[len(given_name_vars):]
        sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None))

        sort_reduced_row_lbls_[0]

        # hack into a new joint factor
        var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_))
        statename_dict = dict(zip(new_vars, var_states))
        cardinality = ut.lmap(len, var_states)
        val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values))
        values = np.zeros(np.prod(cardinality))
        for idx, state in enumerate(ut.iprod(*var_states)):
            if state in val_lookup:
                values[idx] = val_lookup[state]
        joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict)
        print(joint2)
        max_marginals = {}
        for i, var in enumerate(query_name_vars):
            one_out = query_name_vars[:i] + query_name_vars[i + 1:]
            max_marginals[var] = joint2.marginalize(one_out, inplace=False)
            # max_marginals[var] = joint2.maximize(one_out, inplace=False)
        print(joint2.marginalize(['Nb', 'Nc'], inplace=False))
        factor_list = max_marginals.values()

        # Better map assignment based on knowledge of labels
        map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0]))

        sort_reduced_rowstr_lbls = [
            ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True,
                     strvals=True)
            for lbls in sort_reduced_row_lbls_
        ]

        top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values))
        if len(sort_reduced_values) > 3:
            top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))]

        # import utool
        # utool.embed()

        # Compute all marginals
        # probs = infr.query(query_vars, evidence)
        #probs = infr.query(query_vars, evidence)
        # factor_list = probs.values()

        ## Marginalize over non-query, non-evidence
        #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars)
        #joint.marginalize(irrelevant_vars)
        #joint.normalize()
        #new_rows = joint._row_labels()
        #new_vals = joint.values.ravel()
        #map_vals = new_rows[new_vals.argmax()]
        #map_assign = dict(zip(joint.variables, map_vals))
        # Compute Marginalized MAP joints
        #marginalized_joints = {}
        #for ttype in interest_ttypes:
        #    other_vars = [v for v in joint_factor.scope()
        #                  if model.var2_cpd[v].ttype != ttype]
        #    marginal = joint_factor.marginalize(other_vars, inplace=False)
        #    marginalized_joints[ttype] = marginal
        query_results = {
            'factor_list': factor_list,
            'top_assignments': top_assignments,
            'map_assign': map_assign,
            'marginalized_joints': None,
        }
        return query_results
示例#12
0
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool_ibeis as vt
    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {'mu': 1.0, 'sigma': 2.2},
        False: {'mu': 7.0, 'sigma': .9}
    }

    if True:
        import vtool_ibeis as vt
        import plottool_ibeis as pt
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        print(ut.repr2(pairwise_scores_mat, precision=1))

    #aids = list(range(num_annots))
    #g = netx.DiGraph()
    #g.add_nodes_from(aids)
    #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    #netx.draw_graphviz(g)
    #pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn
    bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, ('[] bandwidth is 0. Cannot cluster')
    # bandwidth is with respect to the RBF used in clustering
    #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    #)

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))
    #X_data, seconds_thresh, criterion='distance')

    #help(hdbscan.HDBSCAN)

    import hdbscan
    alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))

    #import ddbscan
    #help(ddbscan.DDBSCAN)
    #alg = ddbscan.DDBSCAN(2, 2)

    #D = np.zeros((len(aids), len(aids) + 1))
    #D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    #hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow
        #from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])
示例#13
0
def invert_assigns(idx_to_wxs, idx_to_maws, verbose=False):
    r"""
    Inverts assignment of
    vectors->to->words into words->to->vectors.
    Invert mapping -- Group by word indexes

    This gives a HUGE speedup over the old invert_assigns

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.smk.smk_funcs import *  # NOQA
        >>> idx_to_wxs = np.ma.array([
        >>>     (0, 4),
        >>>     (2, -1),
        >>>     (2, 0)], dtype=np.int32)
        >>> idx_to_wxs[1, 1] = np.ma.masked
        >>> idx_to_maws = np.ma.array(
        >>>     [(.5, 1.), (1., np.nan), (.5, .5)], dtype=np.float32)
        >>> idx_to_maws[1, 1] = np.ma.masked
        >>> tup = invert_assigns(idx_to_wxs, idx_to_maws)
        >>> wx_to_idxs, wx_to_maws = tup
        >>> result = 'wx_to_idxs = %s' % (ut.repr4(wx_to_idxs, with_dtype=True),)
        >>> result += '\nwx_to_maws = %s' % (ut.repr4(wx_to_maws, with_dtype=True),)
        >>> print(result)
        wx_to_idxs = {
            0: np.array([0, 2], dtype=np.int32),
            2: np.array([1, 2], dtype=np.int32),
            4: np.array([0], dtype=np.int32),
        }
        wx_to_maws = {
            0: np.array([0.5, 0.5], dtype=np.float32),
            2: np.array([1. , 0.5], dtype=np.float32),
            4: np.array([1.], dtype=np.float32),
        }
    """
    assert isinstance(idx_to_wxs, np.ma.masked_array)
    assert isinstance(idx_to_maws, np.ma.masked_array)

    nrows, ncols = idx_to_wxs.shape
    if len(idx_to_wxs.mask.shape) == 0:
        valid_mask = np.ones((nrows, ncols), dtype=np.bool)
    else:
        valid_mask = ~idx_to_maws.mask
        # idx_to_nAssign = (valid_mask).sum(axis=1)

    _valid_x2d = np.flatnonzero(valid_mask)
    flat_idxs = np.floor_divide(_valid_x2d, ncols, dtype=np.int32)
    flat_wxs = idx_to_wxs.compressed()
    flat_maws = idx_to_maws.compressed()

    sortx = flat_wxs.argsort()
    flat_wxs = flat_wxs.take(sortx)
    flat_idxs = flat_idxs.take(sortx)
    flat_maws = flat_maws.take(sortx)

    wx_keys, groupxs = vt.group_indices(flat_wxs)
    idxs_list = vt.apply_grouping(flat_idxs, groupxs)
    maws_list = vt.apply_grouping(flat_maws, groupxs)

    wx_to_idxs = dict(zip(wx_keys, idxs_list))
    wx_to_maws = dict(zip(wx_keys, maws_list))

    if verbose:
        print('[vocab] L___ End Assign vecs to words.')
    return (wx_to_idxs, wx_to_maws)
示例#14
0
def compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets):
    """
    More efficient version of agg on a stacked structure

    Args:
        words (ndarray): entire vocabulary of words
        flat_wxs_assign (ndarray): maps a stacked index to word index
        flat_vecs (ndarray): stacked SIFT descriptors
        flat_offsets (ndarray): offset positions per annotation

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=1000, nannots=10)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=100, nannots=5)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1
    """
    grouped_wxs = [flat_wxs_assign[l:r] for l, r in ut.itertwo(flat_offsets)]

    # Assume single assignment, aggregate everything
    # across the entire database
    flat_offsets = np.array(flat_offsets)

    idx_to_dx = (np.searchsorted(
        flat_offsets, np.arange(len(flat_wxs_assign)), side='right') -
                 1).astype(np.int32)

    if isinstance(flat_wxs_assign, np.ma.masked_array):
        wx_list = flat_wxs_assign.T[0].compressed()
    else:
        wx_list = flat_wxs_assign.T[0].ravel()
    unique_wx, groupxs = vt.group_indices(wx_list)

    dim = flat_vecs.shape[1]
    if isinstance(flat_wxs_assign, np.ma.masked_array):
        dx_to_wxs = [np.unique(wxs.compressed()) for wxs in grouped_wxs]
    else:
        dx_to_wxs = [np.unique(wxs.ravel()) for wxs in grouped_wxs]
    dx_to_nagg = [len(wxs) for wxs in dx_to_wxs]
    num_agg_vecs = sum(dx_to_nagg)
    # all_agg_wxs = np.hstack(dx_to_wxs)
    agg_offset_list = np.array([0] + ut.cumsum(dx_to_nagg))
    # Preallocate agg residuals for all dxs
    all_agg_vecs = np.empty((num_agg_vecs, dim), dtype=np.float32)
    all_agg_vecs[:, :] = np.nan

    # precompute agg residual stack
    i_to_dxs = vt.apply_grouping(idx_to_dx, groupxs)
    subgroup = [vt.group_indices(dxs) for dxs in ut.ProgIter(i_to_dxs)]
    i_to_unique_dxs = ut.take_column(subgroup, 0)
    i_to_dx_groupxs = ut.take_column(subgroup, 1)
    num_words = len(unique_wx)

    # Overall this takes 5 minutes and 21 seconds
    # I think the other method takes about 12 minutes
    for i in ut.ProgIter(range(num_words), 'agg'):
        wx = unique_wx[i]
        xs = groupxs[i]
        dxs = i_to_unique_dxs[i]
        dx_groupxs = i_to_dx_groupxs[i]
        word = words[wx:wx + 1]

        offsets1 = agg_offset_list.take(dxs)
        offsets2 = [np.where(dx_to_wxs[dx] == wx)[0][0] for dx in dxs]
        offsets = np.add(offsets1, offsets2, out=offsets1)

        # if __debug__:
        #     assert np.bincount(dxs).max() < 2
        #     offset = agg_offset_list[dxs[0]]
        #     assert np.all(dx_to_wxs[dxs[0]] == all_agg_wxs[offset:offset +
        #                                                    dx_to_nagg[dxs[0]]])

        # Compute residuals
        rvecs = flat_vecs[xs] - word
        vt.normalize(rvecs, axis=1, out=rvecs)
        rvecs[np.all(np.isnan(rvecs), axis=1)] = 0
        # Aggregate across same images
        grouped_rvecs = vt.apply_grouping(rvecs, dx_groupxs, axis=0)
        agg_rvecs_ = [rvec_group.sum(axis=0) for rvec_group in grouped_rvecs]
        # agg_rvecs = np.vstack(agg_rvecs_)
        all_agg_vecs[offsets, :] = agg_rvecs_

    assert not np.any(np.isnan(all_agg_vecs))
    print('Apply normalization')
    vt.normalize(all_agg_vecs, axis=1, out=all_agg_vecs)
    all_error_flags = np.all(np.isnan(all_agg_vecs), axis=1)
    all_agg_vecs[all_error_flags, :] = 0

    # ndocs_per_word1 = np.array(ut.lmap(len, wx_to_unique_dxs))
    # ndocs_total1 = len(flat_offsets) - 1
    # idf1 = smk_funcs.inv_doc_freq(ndocs_total1, ndocs_per_word1)

    tup = all_agg_vecs, all_error_flags, agg_offset_list
    return tup
示例#15
0
def get_review_edges(cm_list, ibs=None, review_cfg={}):
    r"""
    Needs to be moved to a better file. Maybe something to do with
    identification.

    Returns a list of matches that should be inspected
    This function is more lightweight than orgres or allres.
    Used in id_review_api and interact_qres2

    Args:
        cm_list (list): list of chip match objects
        ranks_top (int): put all ranks less than this number into the graph
        directed (bool):

    Returns:
        tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)

    CommandLine:
        python -m ibeis.gui.id_review_api get_review_edges:0

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_()
        >>> cm_list = qreq_.execute()
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg)
        >>> print(review_edges)

    Example1:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20')
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example3:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100')
        >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example4:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10')
        >>> ranks_top = 3
        >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)
    """
    import vtool_ibeis as vt
    from ibeis.algo.hots import chip_match
    automatch_kw = REVIEW_CFG_DEFAULTS.copy()
    automatch_kw = ut.update_existing(automatch_kw, review_cfg)
    print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw)))
    print('[resorg] len(cm_list) = %d' % (len(cm_list)))
    qaids_stack = []
    daids_stack = []
    ranks_stack = []
    scores_stack = []

    # For each QueryResult, Extract inspectable candidate matches
    if isinstance(cm_list, dict):
        cm_list = list(cm_list.values())

    if len(cm_list) == 0:
        return ([], [], [], [])

    for cm in cm_list:
        if isinstance(cm, chip_match.ChipMatch):
            daids = cm.get_top_aids(ntop=automatch_kw['ranks_top'])
            scores = cm.get_top_scores(ntop=automatch_kw['ranks_top'])
            ranks = np.arange(len(daids))
            qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype)
        else:
            (qaids, daids, scores, ranks) = cm.get_match_tbldata(
                ranks_top=automatch_kw['ranks_top'],
                name_scoring=automatch_kw['name_scoring'],
                ibs=ibs)
        qaids_stack.append(qaids)
        daids_stack.append(daids)
        scores_stack.append(scores)
        ranks_stack.append(ranks)

    # Stack them into a giant array
    qaid_arr = np.hstack(qaids_stack)
    daid_arr = np.hstack(daids_stack)
    score_arr = np.hstack(scores_stack)
    rank_arr = np.hstack(ranks_stack)

    # Sort by scores
    sortx = score_arr.argsort()[::-1]
    qaid_arr = qaid_arr[sortx]
    daid_arr = daid_arr[sortx]
    score_arr = score_arr[sortx]
    rank_arr = rank_arr[sortx]

    # IS_REVIEWED DOES NOT WORK
    if automatch_kw['filter_reviewed']:
        _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(),
                                                      daid_arr.tolist())
        is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool)
        qaid_arr = qaid_arr.compress(is_unreviewed)
        daid_arr = daid_arr.compress(is_unreviewed)
        score_arr = score_arr.compress(is_unreviewed)
        rank_arr = rank_arr.compress(is_unreviewed)

    # Remove directed edges
    if not automatch_kw['directed']:
        #nodes = np.unique(directed_edges.flatten())
        directed_edges = np.vstack((qaid_arr, daid_arr)).T
        #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1])

        unique_rowx = vt.find_best_undirected_edge_indexes(
            directed_edges, score_arr)

        qaid_arr = qaid_arr.take(unique_rowx)
        daid_arr = daid_arr.take(unique_rowx)
        score_arr = score_arr.take(unique_rowx)
        rank_arr = rank_arr.take(unique_rowx)

    # Filter Double Name Matches
    if automatch_kw['filter_duplicate_true_matches']:
        # filter_dup_namepairs
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        if not automatch_kw['directed']:
            directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T
            unique_rowx2 = vt.find_best_undirected_edge_indexes(
                directed_name_edges, score_arr)
        else:
            namepair_id_list = np.array(
                vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr))))
            unique_namepair_ids, namepair_groupxs = vt.group_indices(
                namepair_id_list)
            score_namepair_groups = vt.apply_grouping(score_arr,
                                                      namepair_groupxs)
            unique_rowx2 = np.array(sorted([
                groupx[score_group.argmax()] for groupx, score_group in zip(
                    namepair_groupxs, score_namepair_groups)
            ]),
                                    dtype=np.int32)
        qaid_arr = qaid_arr.take(unique_rowx2)
        daid_arr = daid_arr.take(unique_rowx2)
        score_arr = score_arr.take(unique_rowx2)
        rank_arr = rank_arr.take(unique_rowx2)

    # Filter all true matches
    if automatch_kw['filter_true_matches']:
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        valid_flags = qnid_arr != dnid_arr
        qaid_arr = qaid_arr.compress(valid_flags)
        daid_arr = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr = rank_arr.compress(valid_flags)

    if automatch_kw['filter_photobombs']:
        unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr]))
        #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids)
        invalid_nid_map = get_photobomber_map(ibs, qaid_arr)

        nid2_aids = ut.group_items(unique_aids,
                                   ibs.get_annot_nids(unique_aids))

        expanded_aid_map = ut.ddict(set)
        for nid1, other_nids in invalid_nid_map.items():
            for aid1 in nid2_aids[nid1]:
                for nid2 in other_nids:
                    for aid2 in nid2_aids[nid2]:
                        expanded_aid_map[aid1].add(aid2)
                        expanded_aid_map[aid2].add(aid1)

        valid_flags = [
            daid not in expanded_aid_map[qaid]
            for qaid, daid in zip(qaid_arr, daid_arr)
        ]
        qaid_arr = qaid_arr.compress(valid_flags)
        daid_arr = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr = rank_arr.compress(valid_flags)

    review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)
    return review_edges
示例#16
0
def compute_fmech_score(cm, qreq_=None, hack_single_ori=False):
    r"""
    nsum. This is the fmech scoring mechanism.


    Args:
        cm (ibeis.ChipMatch):

    Returns:
        tuple: (unique_nids, nsum_score_list)

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:0
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2
        utprof.py -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2
        utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> cm = testdata_chipmatch()
        >>> nsum_score_list = compute_fmech_score(cm)
        >>> assert np.all(nsum_score_list == [ 4.,  7.,  5.])

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18])
        >>> cm = cm_list[0]
        >>> cm.evaluate_dnids(qreq_)
        >>> cm._cast_scores()
        >>> #cm.qnid = 1   # Hack for testdb1 names
        >>> nsum_score_list = compute_fmech_score(cm, qreq_)
        >>> #assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment'
        >>> flags = (cm.unique_nids == cm.qnid)
        >>> max_true = nsum_score_list[flags].max()
        >>> max_false = nsum_score_list[~flags].max()
        >>> assert max_true > max_false, 'is this truely a hard case?'
        >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,)

    Example2:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(query_rotation_heuristic=True))
        >>> cm = cm_list[0]
        >>> cm.score_name_nsum(qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)

    Example3:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(query_rotation_heuristic=True))
        >>> cm = cm_list[0]
        >>> cm.score_name_nsum(qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)
    """
    #assert qreq_ is not None
    if hack_single_ori is None:
        try:
            hack_single_ori =  qreq_ is not None and (
                qreq_.qparams.query_rotation_heuristic or qreq_.qparams.rotation_invariance
            )
        except AttributeError:
            hack_single_ori =  True
    # The core for each feature match
    #
    # The query feature index for each feature match
    fm_list = cm.fm_list
    fs_list = cm.get_fsv_prod_list()
    fx1_list = [fm.T[0] for fm in fm_list]
    if hack_single_ori:
        # Group keypoints with the same xy-coordinate.
        # Combine these feature so each only recieves one vote
        kpts1 = qreq_.ibs.get_annot_kpts(
            cm.qaid, config2_=qreq_.extern_query_config2)
        xys1_ = vt.get_xys(kpts1).T
        fx1_to_comboid = vt.compute_unique_arr_dataids(xys1_)
        fcombo_ids = [fx1_to_comboid.take(fx1) for fx1 in fx1_list]
    else:
        # use the feature index itself as a combo id
        # so each feature only recieves one vote
        fcombo_ids = fx1_list

    if False:
        import ubelt as ub
        for ids in fcombo_ids:
            ub.find_duplicates(ids)

    # Group annotation matches by name
    # nsum_nid_list, name_groupxs = vt.group_indices(cm.dnid_list)
    # nsum_nid_list = cm.unique_nids
    name_groupxs = cm.name_groupxs

    nsum_score_list = []
    # For all indicies matched to a particular name
    for name_idxs in name_groupxs:
        # Get feat indicies and scores corresponding to the name's annots
        name_combo_ids = ut.take(fcombo_ids, name_idxs)
        name_fss = ut.take(fs_list, name_idxs)
        # Flatten over annots in the name
        fs  = np.hstack(name_fss)
        if len(fs) == 0:
            nsum_score_list.append(0)
            continue
        combo_ids = np.hstack(name_combo_ids)
        # Features (with the same id) can't vote for this name twice
        group_idxs = vt.group_indices(combo_ids)[1]
        flagged_idxs = [idxs[fs.take(idxs).argmax()] for idxs in group_idxs]
        # Detail: sorting the idxs preseveres summation order
        # this fixes the numerical issue where nsum and csum were off
        flagged_idxs = np.sort(flagged_idxs)
        name_score = fs.take(flagged_idxs).sum()

        nsum_score_list.append(name_score)
    nsum_score_list = np.array(nsum_score_list)

    return nsum_score_list
示例#17
0
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs):
    """
    very hacky, but cute way to cache keypoint distinctivness

    Args:
        ibs (IBEISController):  ibeis controller object
        aid_list (list):
        dstncvs_normer (None):

    Returns:
        list: dstncvs_list

    CommandLine:
        python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness

    Example:
        >>> # SLOW_DOCTEST
        >>> # xdoctest: +SKIP
        >>> from ibeis.control.manual_ibeiscontrol_funcs import *  # NOQA
        >>> from ibeis.algo.hots import distinctiveness_normalizer
        >>> import ibeis
        >>> import numpy as np
        >>> config2_ = None
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)
        >>> # execute function
        >>> aid_list1 = aid_list[::2]
        >>> aid_list2 = aid_list[1::3]
        >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1)
        >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2)
        >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list)
        >>> print(ut.depth_profile(dstncvs_list1))
        >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list])
        >>> print(ut.repr2(stats_dict))
        >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds'
        >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds'
    """
    from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer

    # per-species disinctivness wrapper around ibeis cached function
    # get feature rowids
    aid_list = np.array(aid_list)
    fid_list = np.array(
        ibs.get_annot_feat_rowids(aid_list,
                                  ensure=True,
                                  eager=True,
                                  nInput=None,
                                  config2_=config2_))
    species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list))
    # Compute distinctivness separately for each species
    unique_sids, groupxs = vt.group_indices(species_rowid_list)
    fids_groups = vt.apply_grouping(fid_list, groupxs)
    species_text_list = ibs.get_species_texts(unique_sids)
    # Map distinctivness computation
    normer_list = [
        dcvs_normer.request_species_distinctiveness_normalizer(species)
        for species in species_text_list
    ]
    # Reduce to get results
    dstncvs_groups = [
        get_feat_kpts_distinctiveness(ibs,
                                      fids,
                                      dstncvs_normer=dstncvs_normer,
                                      species_rowid=sid,
                                      **kwargs) for dstncvs_normer, fids, sid
        in zip(normer_list, fids_groups, unique_sids)
    ]
    dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs)
    return dstncvs_list