예제 #1
0
def group_aids_by_featweight_species(ibs, aid_list, config2_=None):
    """ helper

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = None
        >>> aid_list = ibs.get_valid_aids()
        >>> grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(ibs, aid_list, config2_)
    """
    if config2_ is None:
        featweight_species = ibs.cfg.featweight_cfg.featweight_species
    else:
        featweight_species = config2_.get('featweight_species')
        assert featweight_species is not None
    if featweight_species == 'uselabel':
        # Use the labeled species for the detector
        species_list = ibs.get_annot_species_texts(aid_list)
    else:
        species_list = [featweight_species]
    aid_list = np.array(aid_list)
    species_list = np.array(species_list)
    species_rowid = np.array(ibs.get_species_rowids_from_text(species_list))
    unique_species_rowids, groupxs = vtool.group_indices(species_rowid)
    grouped_aids    = vtool.apply_grouping(aid_list, groupxs)
    grouped_species = vtool.apply_grouping(species_list, groupxs)
    unique_species = ut.get_list_column(grouped_species, 0)
    return grouped_aids, unique_species, groupxs
예제 #2
0
    def get_patches(invassign, wx):
        ax_list = invassign.wx2_axs[wx]
        fx_list = invassign.wx2_fxs[wx]
        config = invassign.fstack.config
        ibs = invassign.fstack.ibs

        unique_axs, groupxs = vt.group_indices(ax_list)
        fxs_groups = vt.apply_grouping(fx_list, groupxs)

        unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs)

        all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config)
        sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0)

        chip_list = ibs.depc_annot.d.get_chips_img(unique_aids)
        # convert to approprate colorspace
        #if colorspace is not None:
        #    chip_list = vt.convert_image_list_colorspace(chip_list, colorspace)
        # ut.print_object_size(chip_list, 'chip_list')
        patch_size = 64
        grouped_patches_list = [
            vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0]
            for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list),
                                          nTotal=len(unique_aids),
                                          lbl='warping patches')
        ]
        # Make it correspond with original fx_list and ax_list
        word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs)
        return word_patches
예제 #3
0
def get_annotmatch_rowids_from_aid2(ibs, aid2_list, eager=True, nInput=None,
                                    force_method=None):
    """
    # This one is slow because aid2 is the second part of the index

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid2_list = ibs.get_valid_aids()
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid2_list=aid2_list, num_list=num_list):
        >>>    return (aid2_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid2',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if force_method != 2 and (nInput < 128 or (force_method == 1)):
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,)
        # FIXME: col_rowid is not correct
        params_iter = zip(aid2_list)
        andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2]
        annotmatch_rowid_list = ibs.db.get_where2(
            ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, andwhere_colnames,
            eager=eager, nInput=nInput, unpack_scalars=False)
    elif force_method == 2:
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_))
        annotmatch_rowid_list = ut.dict_take(maping2, aid2_list)
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
예제 #4
0
파일: scoring.py 프로젝트: Erotemic/ibeis
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list,
                            name_score_list, nid2_nidx,
                            nNameShortList, nAnnotPerName):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scoring import *  # NOQA
        >>> # build test data
        >>> daid_list        = np.array([11, 12, 13, 14, 15, 16, 17])
        >>> dnid_list        = np.array([21, 21, 21, 22, 22, 23, 24])
        >>> annot_score_list = np.array([ 6,  2,  3,  5,  6,  3,  2])
        >>> name_score_list  = np.array([ 8,  9,  5,  4])
        >>> nid2_nidx        = {21:0, 22:1, 23:2, 24:3}
        >>> nNameShortList, nAnnotPerName = 3, 2
        >>> # execute function
        >>> args = (daid_list, dnid_list, annot_score_list, name_score_list,
        ...         nid2_nidx, nNameShortList, nAnnotPerName)
        >>> top_daids = get_name_shortlist_aids(*args)
        >>> # verify results
        >>> result = str(top_daids)
        >>> print(result)
        [15, 14, 11, 13, 16]
    """
    unique_nids, groupxs    = vt.group_indices(np.array(dnid_list))
    grouped_annot_scores    = vt.apply_grouping(annot_score_list, groupxs)
    grouped_daids           = vt.apply_grouping(np.array(daid_list), groupxs)
    # Ensure name score list is aligned with the unique_nids
    aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids))
    # Sort each group by the name score
    group_sortx             = aligned_name_score_list.argsort()[::-1]
    _top_daid_groups        = ut.take(grouped_daids, group_sortx)
    _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx)
    top_daid_groups         = ut.listclip(_top_daid_groups, nNameShortList)
    top_annot_score_groups  = ut.listclip(_top_annot_score_groups, nNameShortList)
    # Sort within each group by the annotation score
    top_daid_sortx_groups   = [annot_score_group.argsort()[::-1]
                               for annot_score_group in top_annot_score_groups]
    top_sorted_daid_groups  = vt.ziptake(top_daid_groups, top_daid_sortx_groups)
    top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName)
                         for sorted_daid_group in top_sorted_daid_groups]
    top_daids = ut.flatten(top_clipped_daids)
    return top_daids
예제 #5
0
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None):
    r"""
    fm_list = [fm[:min(len(fm), 10)] for fm in fm_list]
    fs_list = [fs[:min(len(fs), 10)] for fs in fs_list]
    """
    fx1_list = [fm.T[0] for fm in fm_list]
    # Group annotation matches by name
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list  = vt.apply_grouping_(fs_list,  name_groupxs)
    # Stack up all matches to a particular name, keep track of original indicies via offets
    name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list))
    name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0)
    name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1)
    name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list))
    if kpts1 is not None:
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_data_ids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_xyid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat)
        xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat)
        name_group_fx1_groupxs_list = xyid_groupxs_list
    else:
        # Make nested group for every name by query feature index
        fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat]
        name_group_fx1_groupxs_list = fx1_groupxs_list
    name_grouped_fid_grouped_fs_list = [
        vt.apply_grouping(fs_flat, fid_groupxs)
        for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list)
    ]

    # Flag which features are valid in this grouped space. Only one keypoint should be able to vote
    # for each group
    name_grouped_fid_grouped_isvalid_list = [
        np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list])
        for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list
    ]

    # Go back to being grouped only in name space
    #dtype = np.bool
    name_grouped_isvalid_flat_list = [
        vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool)
        for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list)
    ]

    name_grouped_isvalid_unflat_list = [
        ut.unflatten2(isvalid_flat, invertable_cumsum_list)
        for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list)
    ]

    # Reports which features were valid in name scoring for every annotation
    featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs)
    return featflag_list
예제 #6
0
def group_images_by_label(label_arr, gid_arr):
    """
    Input: Length N list of labels and ids
    Output: Length M list of unique labels, and lenth M list of lists of ids
    """
    # Reverse the image to cluster index mapping
    import vtool as vt
    labels_, groupxs_ = vt.group_indices(label_arr)
    sortx = np.array(list(map(len, groupxs_))).argsort()[::-1]
    labels  = labels_.take(sortx, axis=0)
    groupxs = ut.take(groupxs_, sortx)
    label_gids = vt.apply_grouping(gid_arr, groupxs)
    return labels, label_gids
예제 #7
0
def score_chipmatch_true_nsum(qaid, chipmatch, qreq_, return_wrt_aids=False):
    """
    Sums scores over all annots with those names.
    Dupvote weighting should be on to combat double counting
    """
    # Nonhacky version of name scoring
    #(aid2_fm, aid2_fsv, aid2_fk, aid2_score, aid2_H) = chipmatch
    aid2_fsv = chipmatch.aid2_fsv
    NEW_WAY = True
    if NEW_WAY:
        # New version
        aid_list = list(six.iterkeys(aid2_fsv))
        fsv_list = ut.dict_take(aid2_fsv, aid_list)
        #fs_list = [fsv.prod(axis=1) if fsv.shape[1] > 1 else fsv.T[0] for fsv in fsv_list]
        fs_list = [fsv.prod(axis=1) for fsv in fsv_list]
        annot_score_list = np.array([fs.sum() for fs in fs_list])
        annot_nid_list = np.array(qreq_.ibs.get_annot_name_rowids(aid_list))
        nid_list, groupxs = vtool.group_indicies(annot_nid_list)
        grouped_scores = vtool.apply_grouping(annot_score_list, groupxs)
    else:
        aid2_fs = {aid: fsv.prod(axis=1) for aid, fsv in six.iteritems(aid2_fsv)}
        aid_list = list(six.iterkeys(aid2_fs))
        annot_score_list = np.array([fs.sum() for fs in six.itervalues(aid2_fs)])
        annot_nid_list = np.array(qreq_.ibs.get_annot_name_rowids(aid_list))
        nid_list, groupxs = vtool.group_indicies(annot_nid_list)
        grouped_scores = vtool.apply_grouping(annot_score_list, groupxs)
    if return_wrt_aids:
        def indicator_array(size, pos, value):
            """ creates zero array and places value at pos """
            arr = np.zeros(size)
            arr[pos] = value
            return arr
        grouped_nscores = [indicator_array(scores.size, scores.argmax(), scores.sum()) for scores in grouped_scores]
        nscore_list = vtool.clustering2.invert_apply_grouping(grouped_nscores, groupxs)
        #nscore_list = ut.flatten(grouped_nscores)
        return aid_list, nscore_list
    else:
        score_list = [scores.sum() for scores in grouped_scores]
        return nid_list, score_list
예제 #8
0
    def compute_agg_rvecs(invassign, wx):
        """
        Sums and normalizes all rvecs that belong to the same word and the same
        annotation id
        """
        rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx)
        ax_list = invassign.wx2_axs[wx]
        maw_list = invassign.wx2_maws[wx]
        # group members of each word by aid, we will collapse these groups
        unique_ax, groupxs = vt.group_indices(ax_list)
        # (weighted aggregation with multi-assign-weights)
        grouped_maws = vt.apply_grouping(maw_list, groupxs)
        grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs)
        grouped_flags = vt.apply_grouping(~error_flags, groupxs)

        grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0)
        grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags)
        is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_]
        aggvecs = [aggregate_rvecs(rvecs, maws)[0] for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_)]
        unique_ax2_ = unique_ax.compress(is_good)
        ax2_aggvec = dict(zip(unique_ax2_, aggvecs))
        # Need to recompute flags for consistency
        # flag is true when aggvec is all zeros
        return ax2_aggvec
예제 #9
0
파일: tag_funcs.py 프로젝트: Erotemic/ibeis
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object
        aid1_list (list):
        aid2_list (list):
        directed (bool): (default = True)

    Returns:
        list: tags_list

    CommandLine:
        python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.tag_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> has_any = ut.get_argval('--tags', type_=list, default=None)
        >>> min_num = ut.get_argval('--min_num', type_=int, default=1)
        >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1)
        >>> aid1_list = aid_pairs.T[0]
        >>> aid2_list = aid_pairs.T[1]
        >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False)
        >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        >>> print(ut.list_str(tagged_pairs))
        >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        >>> print(ut.dict_str(tag_dict, nl=2))
        >>> print(ut.dict_str(ut.map_dict_vals(len, tag_dict)))
    """
    aid_pairs = np.vstack([aid1_list, aid2_list]).T
    if directed:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
    else:
        expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]])
        expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(
            expanded_aid_pairs.T[0], expanded_aid_pairs.T[1])
        expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs)
        unique_edgeids, groupxs = vt.group_indices(expanded_edgeids)
        expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid)
        grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs)
        undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags]
        edgeid2_tags = dict(zip(unique_edgeids, undirected_tags))
        input_edgeids = expanded_edgeids[:len(aid_pairs)]
        tags_list = ut.dict_take(edgeid2_tags, input_edgeids)
    return tags_list
예제 #10
0
def get_match_results(depc, qaid_list, daid_list, score_list, config):
    """ converts table results into format for ipython notebook """
    #qaid_list, daid_list = request.get_parent_rowids()
    #score_list = request.score_list
    #config = request.config

    unique_qaids, groupxs = ut.group_indices(qaid_list)
    #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs)
    grouped_daids = ut.apply_grouping(daid_list, groupxs)
    grouped_scores = ut.apply_grouping(score_list, groupxs)

    ibs = depc.controller
    unique_qnids = ibs.get_annot_nids(unique_qaids)
    # FIXME: decision should not be part of the config for the one-vs-one
    # scores
    decision_func = getattr(np, config['decision'])
    _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores)
    for qaid, qnid, daids, scores in _iter:
        dnids = ibs.get_annot_nids(daids)

        # Remove distance to self
        annot_scores = np.array(scores)
        daid_list_ = np.array(daids)
        dnid_list_ = np.array(dnids)

        is_valid = (daid_list_ != qaid)
        daid_list_ = daid_list_.compress(is_valid)
        dnid_list_ = dnid_list_.compress(is_valid)
        annot_scores = annot_scores.compress(is_valid)

        # Hacked in version of creating an annot match object
        match_result = ibeis.AnnotMatch()
        match_result.qaid = qaid
        match_result.qnid = qnid
        match_result.daid_list = daid_list_
        match_result.dnid_list = dnid_list_
        match_result._update_daid_index()
        match_result._update_unique_nid_index()

        grouped_annot_scores = vt.apply_grouping(annot_scores, match_result.name_groupxs)
        name_scores = np.array([decision_func(dists) for dists in grouped_annot_scores])
        match_result.set_cannonical_name_score(annot_scores, name_scores)
        yield match_result
예제 #11
0
파일: pgm_ext.py 프로젝트: heroinlin/ibeis
    def consolidate(self, inplace=False):
        """ removes duplicate entries

        Example:
            >>> # UNSTABLE_DOCTEST
            >>> from ibeis.algo.hots.pgm_ext import *  # NOQA
            >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]]
            >>> weights = [.1, .2, .1]
            >>> variables = ['v1', 'v2', 'v3']
            >>> self = ApproximateFactor(state_idxs, weights, variables)
            >>> inplace = False
            >>> phi = self.consolidate(inplace)
            >>> result = str(phi)
            >>> print(result)
            +------+------+------+-----------------------+
            | v1   | v2   | v3   |   \hat{phi}(v1,v2,v3) |
            |------+------+------+-----------------------|
            | v1_1 | v2_0 | v3_1 |                0.3000 |
            | v1_1 | v2_0 | v3_2 |                0.1000 |
            +------+------+------+-----------------------+
        """
        import vtool as vt

        phi = self.copy() if inplace else self

        data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs)
        unique_ids, groupxs = vt.group_indices(data_ids)
        #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs)))))
        if len(data_ids) != len(unique_ids):
            # Sum the values in the cpd to marginalize the duplicate probs
            # Take only the unique rows under this induced labeling
            unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs])
            self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0)
            self.weights = np.array([
                g.sum() for g in vt.apply_grouping(self.weights, groupxs)
            ])
            #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),))
        #else:
        #    print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),))

        if not inplace:
            return phi
예제 #12
0
파일: bayes.py 프로젝트: heroinlin/ibeis
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs,
                    reduced_values):
    import vtool as vt
    #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten())
    reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables]

    evidence_vars = list(evidence.keys())
    evidence_state_idxs = ut.dict_take(evidence, evidence_vars)
    evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars]

    ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes)))
    ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes)))
    # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes)
    # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes)

    # Allow specific types of labels to change
    # everything is the same, only the names have changed.
    # TODO: allow for multiple different label_ttypes
    # for label_ttype in label_ttypes
    if 'name' not in model.ttype2_template:
        return reduced_row_idxs, reduced_values
    label_ttypes = ['name']
    for label_ttype in label_ttypes:
        ev_colxs = ttype2_ev_indices[label_ttype]
        re_colxs = ttype2_re_indices[label_ttype]

        ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs)
        ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int)
        num_ev_ = len(ev_colxs)

        aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist()
        aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs])

        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.

        num_cols = len(aug_state_idxs.T)
        mask = vt.index_to_boolmask(aug_colxs, num_cols)
        other_colxs, = np.where(~mask)
        relbl_states = aug_state_idxs.compress(mask, axis=1)
        other_states = aug_state_idxs.compress(~mask, axis=1)
        tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states)))

        max_tmp_state = -1
        min_tmp_state = tmp_relbl_states.min()

        # rebuild original state structure with temp state idxs
        tmp_state_cols = [None] * num_cols
        for count, colx in enumerate(aug_colxs):
            tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1]
        for count, colx in enumerate(other_colxs):
            tmp_state_cols[colx] = other_states[:, count:count + 1]
        tmp_state_idxs = np.hstack(tmp_state_cols)

        data_ids = np.array(
            vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs))))
        unique_ids, groupxs = vt.group_indices(data_ids)
        print('Collapsed %r states into %r states' % (
            len(data_ids), len(unique_ids),))
        # Sum the values in the cpd to marginalize the duplicate probs
        new_values = np.array([
            g.sum() for g in vt.apply_grouping(reduced_values, groupxs)
        ])
        # Take only the unique rows under this induced labeling
        unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0))
        new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0)

        tmp_idx_set = set((-np.arange(-max_tmp_state,
                                      (-min_tmp_state) + 1)).tolist())
        true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis)))

        # Relabel the rows one more time to agree with initial constraints
        for colx, true_idx in enumerate(ev_state_idxs):
            tmp_idx = np.unique(new_aug_state_idxs.T[colx])
            assert len(tmp_idx) == 1
            tmp_idx_set -= {tmp_idx[0]}
            true_idx_set -= {true_idx}
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx
        # Relabel the remaining idxs
        remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1]
        remain_true_idxs = sorted(list(true_idx_set))
        for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs):
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx

        # Remove evidence based augmented labels
        new_state_idxs = new_aug_state_idxs.T[num_ev_:].T
        return new_state_idxs, new_values
예제 #13
0
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs):
    """
    very hacky, but cute way to cache keypoint distinctivness

    Args:
        ibs (IBEISController):  ibeis controller object
        aid_list (list):
        dstncvs_normer (None):

    Returns:
        list: dstncvs_list

    CommandLine:
        python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.control.manual_ibeiscontrol_funcs import *  # NOQA
        >>> from ibeis.algo.hots import distinctiveness_normalizer
        >>> import ibeis
        >>> import numpy as np
        >>> config2_ = None
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)
        >>> # execute function
        >>> aid_list1 = aid_list[::2]
        >>> aid_list2 = aid_list[1::3]
        >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1)
        >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2)
        >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list)
        >>> print(ut.depth_profile(dstncvs_list1))
        >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list])
        >>> print(ut.dict_str(stats_dict))
        >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds'
        >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds'
    """
    from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer

    # per-species disinctivness wrapper around ibeis cached function
    # get feature rowids
    aid_list = np.array(aid_list)
    fid_list = np.array(ibs.get_annot_feat_rowids(aid_list, ensure=True,
                                                  eager=True, nInput=None,
                                                  config2_=config2_))
    species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list))
    # Compute distinctivness separately for each species
    unique_sids, groupxs = vt.group_indices(species_rowid_list)
    fids_groups          = vt.apply_grouping(fid_list, groupxs)
    species_text_list    = ibs.get_species_texts(unique_sids)
    # Map distinctivness computation
    normer_list = [dcvs_normer.request_species_distinctiveness_normalizer(species)
                   for species in species_text_list]
    # Reduce to get results
    dstncvs_groups = [
        get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer,
                                      species_rowid=sid, **kwargs)
        for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids)
    ]
    dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs)
    return dstncvs_list
예제 #14
0
def get_namescore_nonvoting_feature_flags(fm_list,
                                          fs_list,
                                          dnid_list,
                                          name_groupxs,
                                          kpts1=None):
    r"""
    DEPRICATE

    fm_list = [fm[:min(len(fm), 10)] for fm in fm_list]
    fs_list = [fs[:min(len(fs), 10)] for fs in fs_list]
    """
    fx1_list = [fm.T[0] for fm in fm_list]
    # Group annotation matches by name
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs)
    # Stack up all matches to a particular name, keep track of original indicies via offets
    name_invertable_flat_fx1_list = list(
        map(ut.invertible_flatten2_numpy, name_grouped_fx1_list))
    name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list,
                                               0)
    name_grouped_invertable_cumsum_list = ut.get_list_column(
        name_invertable_flat_fx1_list, 1)
    name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list))
    if kpts1 is not None:
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_data_ids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_comboid_flat = list(
            kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat)
        xyid_groupxs_list = list(
            vt.group_indices(xyid_flat)[1]
            for xyid_flat in name_grouped_comboid_flat)
        name_group_fx1_groupxs_list = xyid_groupxs_list
    else:
        # Make nested group for every name by query feature index
        fx1_groupxs_list = [
            vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat
        ]
        name_group_fx1_groupxs_list = fx1_groupxs_list
    name_grouped_fid_grouped_fs_list = [
        vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in
        zip(name_grouped_fs_flat, name_group_fx1_groupxs_list)
    ]

    # Flag which features are valid in this grouped space. Only one keypoint should be able to vote
    # for each group
    name_grouped_fid_grouped_isvalid_list = [
        np.array(
            [fs_group.max() == fs_group for fs_group in fid_grouped_fs_list])
        for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list
    ]

    # Go back to being grouped only in name space
    # dtype = np.bool
    name_grouped_isvalid_flat_list = [
        vt.invert_apply_grouping2(fid_grouped_isvalid_list,
                                  fid_groupxs,
                                  dtype=np.bool)
        for fid_grouped_isvalid_list, fid_groupxs in zip(
            name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list)
    ]

    name_grouped_isvalid_unflat_list = [
        ut.unflatten2(isvalid_flat, invertable_cumsum_list)
        for isvalid_flat, invertable_cumsum_list in zip(
            name_grouped_isvalid_flat_list,
            name_grouped_invertable_cumsum_list)
    ]

    # Reports which features were valid in name scoring for every annotation
    featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list,
                                             name_groupxs)
    return featflag_list
예제 #15
0
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
    r"""
    CommandLine:
        python -m wbia.algo.hots.bayes --exec-try_query --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.bayes import *  # NOQA
        >>> verbose = True
        >>> other_evidence = {}
        >>> name_evidence = [1, None, 0, None]
        >>> score_evidence = ['high', 'low', 'low']
        >>> query_vars = None
        >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1)
        >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence)
        >>> interest_ttypes = ['name']
        >>> infr = pgmpy.inference.BeliefPropagation(model)
        >>> evidence = infr._ensure_internal_evidence(evidence, model)
        >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose)
        >>> result = ('query_results = %s' % (str(query_results),))
        >>> ut.quit_if_noshow()
        >>> show_model(model, show_prior=True, **query_results)
        >>> ut.show_if_requested()

    Ignore:
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        probs = infr.query(query_vars, evidence)
        map_assignment = infr.map_query(query_vars, evidence)
    """
    infr = pgmpy.inference.VariableElimination(model)
    # infr = pgmpy.inference.BeliefPropagation(model)
    if True:
        return bruteforce(model, query_vars=None, evidence=evidence)
    else:
        import vtool as vt

        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        # hack
        query_vars = ut.setdiff_ordered(
            query_vars, ut.list_getattr(model.ttype2_cpds['score'],
                                        'variable'))
        if verbose:
            evidence_str = ', '.join(model.pretty_evidence(evidence))
            logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str +
                        ') = ')
        # Compute MAP joints
        # There is a bug here.
        # map_assign = infr.map_query(query_vars, evidence)
        # (probably an invalid thing to do)
        # joint_factor = pgmpy.factors.factor_product(*factor_list)
        # Brute force MAP

        name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys()))
        # TODO: incorporate case where Na is assigned to Fred
        # evidence_h = ut.delete_keys(evidence.copy(), ['Na'])

        joint = model.joint_distribution()
        joint.evidence_based_reduction(query_name_vars, evidence, inplace=True)

        # Find static row labels in the evidence
        given_name_vars = [var for var in name_vars if var in evidence]
        given_name_idx = ut.dict_take(evidence, given_name_vars)
        given_name_val = [
            joint.statename_dict[var][idx]
            for var, idx in zip(given_name_vars, given_name_idx)
        ]
        new_vals = joint.values.ravel()
        # Add static evidence variables to the relabeled name states
        new_vars = given_name_vars + joint.variables
        new_rows = [tuple(given_name_val) + row for row in joint._row_labels()]
        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.
        temp_basis = [i for i in range(model.num_names)]

        def relabel_names(names, temp_basis=temp_basis):
            names = list(map(six.text_type, names))
            mapping = {}
            for n in names:
                if n not in mapping:
                    mapping[n] = len(mapping)
            new_names = tuple([temp_basis[mapping[n]] for n in names])
            return new_names

        relabeled_rows = list(map(relabel_names, new_rows))
        # Combine probability of rows with the same (new) label
        data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows))
        unique_ids, groupxs = vt.group_indices(data_ids)
        reduced_row_lbls = ut.take(relabeled_rows,
                                   ut.get_list_column(groupxs, 0))
        reduced_row_lbls = list(map(list, reduced_row_lbls))
        reduced_values = np.array(
            [g.sum() for g in vt.apply_grouping(new_vals, groupxs)])
        # Relabel the rows one more time to agree with initial constraints
        used_ = []
        replaced = []
        for colx, (var, val) in enumerate(zip(given_name_vars,
                                              given_name_val)):
            # All columns must be the same for this labeling
            alias = reduced_row_lbls[0][colx]
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val)
            replaced.append(alias)
            used_.append(val)
        basis = model.ttype2_cpds['name'][0]._template_.basis
        find_remain_ = ut.setdiff_ordered(temp_basis, replaced)
        repl_remain_ = ut.setdiff_ordered(basis, used_)
        for find, repl in zip(find_remain_, repl_remain_):
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl)

        # Now find the most likely state
        sortx = reduced_values.argsort()[::-1]
        sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist())
        sort_reduced_values = reduced_values[sortx]

        # Remove evidence based labels
        new_vars_ = new_vars[len(given_name_vars):]
        sort_reduced_row_lbls_ = ut.get_list_column(
            sort_reduced_row_lbls, slice(len(given_name_vars), None))

        sort_reduced_row_lbls_[0]

        # hack into a new joint factor
        var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_))
        statename_dict = dict(zip(new_vars, var_states))
        cardinality = ut.lmap(len, var_states)
        val_lookup = dict(
            zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values))
        values = np.zeros(np.prod(cardinality))
        for idx, state in enumerate(ut.iprod(*var_states)):
            if state in val_lookup:
                values[idx] = val_lookup[state]
        joint2 = pgmpy.factors.Factor(new_vars_,
                                      cardinality,
                                      values,
                                      statename_dict=statename_dict)
        logger.info(joint2)
        max_marginals = {}
        for i, var in enumerate(query_name_vars):
            one_out = query_name_vars[:i] + query_name_vars[i + 1:]
            max_marginals[var] = joint2.marginalize(one_out, inplace=False)
            # max_marginals[var] = joint2.maximize(one_out, inplace=False)
        logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False))
        factor_list = max_marginals.values()

        # Better map assignment based on knowledge of labels
        map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0]))

        sort_reduced_rowstr_lbls = [
            ut.repr2(dict(zip(new_vars, lbls)),
                     explicit=True,
                     nobraces=True,
                     strvals=True) for lbls in sort_reduced_row_lbls_
        ]

        top_assignments = list(
            zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values))
        if len(sort_reduced_values) > 3:
            top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))]

        # import utool
        # utool.embed()

        # Compute all marginals
        # probs = infr.query(query_vars, evidence)
        # probs = infr.query(query_vars, evidence)
        # factor_list = probs.values()

        ## Marginalize over non-query, non-evidence
        # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars)
        # joint.marginalize(irrelevant_vars)
        # joint.normalize()
        # new_rows = joint._row_labels()
        # new_vals = joint.values.ravel()
        # map_vals = new_rows[new_vals.argmax()]
        # map_assign = dict(zip(joint.variables, map_vals))
        # Compute Marginalized MAP joints
        # marginalized_joints = {}
        # for ttype in interest_ttypes:
        #    other_vars = [v for v in joint_factor.scope()
        #                  if model.var2_cpd[v].ttype != ttype]
        #    marginal = joint_factor.marginalize(other_vars, inplace=False)
        #    marginalized_joints[ttype] = marginal
        query_results = {
            'factor_list': factor_list,
            'top_assignments': top_assignments,
            'map_assign': map_assign,
            'marginalized_joints': None,
        }
        return query_results
예제 #16
0
파일: bayes.py 프로젝트: whaozl/ibeis
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs,
                    reduced_values):
    import vtool as vt
    #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten())
    reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables]

    evidence_vars = list(evidence.keys())
    evidence_state_idxs = ut.dict_take(evidence, evidence_vars)
    evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars]

    ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes)))
    ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes)))
    # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes)
    # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes)

    # Allow specific types of labels to change
    # everything is the same, only the names have changed.
    # TODO: allow for multiple different label_ttypes
    # for label_ttype in label_ttypes
    if NAME_TTYPE not in model.ttype2_template:
        return reduced_row_idxs, reduced_values
    label_ttypes = [NAME_TTYPE]
    for label_ttype in label_ttypes:
        ev_colxs = ttype2_ev_indices[label_ttype]
        re_colxs = ttype2_re_indices[label_ttype]

        ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs)
        ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int)
        num_ev_ = len(ev_colxs)

        aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist()
        aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs])

        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.

        num_cols = len(aug_state_idxs.T)
        mask = vt.index_to_boolmask(aug_colxs, num_cols)
        other_colxs, = np.where(~mask)
        relbl_states = aug_state_idxs.compress(mask, axis=1)
        other_states = aug_state_idxs.compress(~mask, axis=1)
        tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states)))

        max_tmp_state = -1
        min_tmp_state = tmp_relbl_states.min()

        # rebuild original state structure with temp state idxs
        tmp_state_cols = [None] * num_cols
        for count, colx in enumerate(aug_colxs):
            tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1]
        for count, colx in enumerate(other_colxs):
            tmp_state_cols[colx] = other_states[:, count:count + 1]
        tmp_state_idxs = np.hstack(tmp_state_cols)

        data_ids = np.array(
            vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs))))
        unique_ids, groupxs = vt.group_indices(data_ids)
        print('Collapsed %r states into %r states' % (
            len(data_ids), len(unique_ids),))
        # Sum the values in the cpd to marginalize the duplicate probs
        new_values = np.array([
            g.sum() for g in vt.apply_grouping(reduced_values, groupxs)
        ])
        # Take only the unique rows under this induced labeling
        unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0))
        new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0)

        tmp_idx_set = set((-np.arange(-max_tmp_state,
                                      (-min_tmp_state) + 1)).tolist())
        true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis)))

        # Relabel the rows one more time to agree with initial constraints
        for colx, true_idx in enumerate(ev_state_idxs):
            tmp_idx = np.unique(new_aug_state_idxs.T[colx])
            assert len(tmp_idx) == 1
            tmp_idx_set -= {tmp_idx[0]}
            true_idx_set -= {true_idx}
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx
        # Relabel the remaining idxs
        remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1]
        remain_true_idxs = sorted(list(true_idx_set))
        for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs):
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx

        # Remove evidence based augmented labels
        new_state_idxs = new_aug_state_idxs.T[num_ev_:].T
        return new_state_idxs, new_values
예제 #17
0
def align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx,
                                  name_groupxs, name_score_list):
    r"""
    takes name scores and gives them to the best annotation

    Returns:
        score_list: list of scores aligned with cm.daid_list and cm.dnid_list

    Args:
        annot_score_list (list): score associated with each annot
        name_groupxs (list): groups annot_score lists into groups compatible with name_score_list
        name_score_list (list): score assocated with name
        nid2_nidx (dict): mapping from nids to index in name score list

    CommandLine:
        python -m wbia.algo.hots.name_scoring --test-align_name_scores_with_annots
        python -m wbia.algo.hots.name_scoring --test-align_name_scores_with_annots --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.name_scoring import *  # NOQA
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18])
        >>> cm = cm_list[0]
        >>> cm.evaluate_csum_annot_score(qreq_)
        >>> cm.evaluate_nsum_name_score(qreq_)
        >>> # Annot aligned lists
        >>> annot_score_list = cm.algo_annot_scores['csum']
        >>> annot_aid_list   = cm.daid_list
        >>> daid2_idx        = cm.daid2_idx
        >>> # Name aligned lists
        >>> name_score_list  = cm.algo_name_scores['nsum']
        >>> name_groupxs     = cm.name_groupxs
        >>> # Execute Function
        >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list)
        >>> # Check that the correct name gets the highest score
        >>> target = name_score_list[cm.nid2_nidx[cm.qnid]]
        >>> test_index = np.where(score_list == target)[0][0]
        >>> cm.score_list = score_list
        >>> ut.assert_eq(ibs.get_annot_name_rowids(cm.daid_list[test_index]), cm.qnid)
        >>> assert ut.isunique(cm.dnid_list[score_list > 0]), 'bad name score'
        >>> top_idx = cm.algo_name_scores['nsum'].argmax()
        >>> assert cm.get_top_nids()[0] == cm.unique_nids[top_idx], 'bug in alignment'
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_)
        >>> ut.show_if_requested()

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.name_scoring import *  # NOQA
        >>> annot_score_list = []
        >>> annot_aid_list   = []
        >>> daid2_idx        = {}
        >>> # Name aligned lists
        >>> name_score_list  = np.array([], dtype=np.float32)
        >>> name_groupxs     = []
        >>> # Execute Function
        >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list)
    """
    if len(name_groupxs) == 0:
        score_list = np.empty(0, dtype=name_score_list.dtype)
        return score_list
    else:
        # Group annot aligned indicies by nid
        annot_aid_list = np.array(annot_aid_list)
        # nid_list, groupxs  = vt.group_indices(annot_nid_list)
        grouped_scores = vt.apply_grouping(annot_score_list, name_groupxs)
        grouped_annot_aids = vt.apply_grouping(annot_aid_list, name_groupxs)
        flat_grouped_aids = np.hstack(grouped_annot_aids)
        # flat_groupxs  = np.hstack(name_groupxs)
        # if __debug__:
        #    sum_scores = np.array([scores.sum() for scores in grouped_scores])
        #    max_scores = np.array([scores.max() for scores in grouped_scores])
        #    assert np.all(name_score_list <= sum_scores)
        #    assert np.all(name_score_list > max_scores)
        # +------------
        # Find the position of the highest name_scoring annotation for each name
        # IN THE FLATTENED GROUPED ANNOT_AID_LIST (this was the bug)
        offset_list = np.array(
            [annot_scores.argmax() for annot_scores in grouped_scores])
        # Find the starting position of eatch group use chain to start offsets with 0
        _padded_scores = itertools.chain([[]], grouped_scores[:-1])
        sizeoffset_list = np.array(
            [len(annot_scores) for annot_scores in _padded_scores])
        baseindex_list = sizeoffset_list.cumsum()
        # Augment starting position with offset index
        annot_idx_list = np.add(baseindex_list, offset_list)
        # L______________
        best_aid_list = flat_grouped_aids[annot_idx_list]
        best_idx_list = ut.dict_take(daid2_idx, best_aid_list)
        # give the annotation domain a name score
        # score_list = np.zeros(len(annot_score_list), dtype=name_score_list.dtype)
        score_list = np.full(len(annot_score_list),
                             fill_value=-np.inf,
                             dtype=name_score_list.dtype)
        # score_list = np.full(len(annot_score_list), fill_value=np.nan, dtype=name_score_list.dtype)
        # score_list = np.nan(len(annot_score_list), dtype=name_score_list.dtype)
        # HACK: we need to set these to 'low' values and we also have to respect negatives
        # score_list[:] = -np.inf
        # make sure that the nid_list from group_indicies and the nids belonging to
        # name_score_list (cm.unique_nids) are in alignment
        # nidx_list = np.array(ut.dict_take(nid2_nidx, nid_list))

        # THIS ASSUMES name_score_list IS IN ALIGNMENT WITH BOTH cm.unique_nids and
        # nid_list (which should be == cm.unique_nids)
        score_list[best_idx_list] = name_score_list
        return score_list
예제 #18
0
def align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list):
    r"""
    takes name scores and gives them to the best annotation

    Returns:
        score_list: list of scores aligned with cm.daid_list and cm.dnid_list

    Args:
        annot_score_list (list): score associated with each annot
        name_groupxs (list): groups annot_score lists into groups compatible with name_score_list
        name_score_list (list): score assocated with name
        nid2_nidx (dict): mapping from nids to index in name score list

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-align_name_scores_with_annots
        python -m ibeis.algo.hots.name_scoring --test-align_name_scores_with_annots --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('PZ_MTEST', qaid_list=[18])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18])
        >>> cm = cm_list[0]
        >>> cm.evaluate_csum_score(qreq_)
        >>> cm.evaluate_nsum_score(qreq_)
        >>> # Annot aligned lists
        >>> annot_score_list = cm.algo_annot_scores['csum']
        >>> annot_aid_list   = cm.daid_list
        >>> daid2_idx        = cm.daid2_idx
        >>> # Name aligned lists
        >>> name_score_list  = cm.algo_name_scores['nsum']
        >>> name_groupxs     = cm.name_groupxs
        >>> # Execute Function
        >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list)
        >>> # Check that the correct name gets the highest score
        >>> target = name_score_list[cm.nid2_nidx[cm.qnid]]
        >>> test_index = np.where(score_list == target)[0][0]
        >>> cm.score_list = score_list
        >>> ut.assert_eq(ibs.get_annot_name_rowids(cm.daid_list[test_index]), cm.qnid)
        >>> assert ut.isunique(cm.dnid_list[score_list > 0]), 'bad name score'
        >>> assert cm.get_top_nids()[0] == cm.unique_nids[cm.nsum_score_list.argmax()], 'bug in alignment'
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_)
        >>> ut.show_if_requested()

    Example:
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> annot_score_list = []
        >>> annot_aid_list   = []
        >>> daid2_idx        = {}
        >>> # Name aligned lists
        >>> name_score_list  = np.array([], dtype=np.float32)
        >>> name_groupxs     = []
        >>> # Execute Function
        >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list)

    Ignore:
        dict(zip(cm.dnid_list, cm.score_list))
        dict(zip(cm.unique_nids, cm.nsum_score_list))
        np.all(nid_list == cm.unique_nids)
    """
    if len(name_groupxs) == 0:
        score_list = np.empty(0, dtype=name_score_list.dtype)
        return score_list
    else:
        # Group annot aligned indicies by nid
        annot_aid_list = np.array(annot_aid_list)
        #nid_list, groupxs  = vt.group_indices(annot_nid_list)
        grouped_scores     = vt.apply_grouping(annot_score_list, name_groupxs)
        grouped_annot_aids = vt.apply_grouping(annot_aid_list, name_groupxs)
        flat_grouped_aids  = np.hstack(grouped_annot_aids)
        #flat_groupxs  = np.hstack(name_groupxs)
        #if __debug__:
        #    sum_scores = np.array([scores.sum() for scores in grouped_scores])
        #    max_scores = np.array([scores.max() for scores in grouped_scores])
        #    assert np.all(name_score_list <= sum_scores)
        #    assert np.all(name_score_list > max_scores)
        # +------------
        # Find the position of the highest name_scoring annotation for each name
        # IN THE FLATTENED GROUPED ANNOT_AID_LIST (this was the bug)
        offset_list = np.array([annot_scores.argmax() for annot_scores in grouped_scores])
        # Find the starting position of eatch group use chain to start offsets with 0
        _padded_scores  = itertools.chain([[]], grouped_scores[:-1])
        sizeoffset_list = np.array([len(annot_scores) for annot_scores in _padded_scores])
        baseindex_list  = sizeoffset_list.cumsum()
        # Augment starting position with offset index
        annot_idx_list = np.add(baseindex_list, offset_list)
        # L______________
        best_aid_list = flat_grouped_aids[annot_idx_list]
        best_idx_list = ut.dict_take(daid2_idx, best_aid_list)
        # give the annotation domain a name score
        #score_list = np.zeros(len(annot_score_list), dtype=name_score_list.dtype)
        score_list = np.full(len(annot_score_list), fill_value=-np.inf, dtype=name_score_list.dtype)
        #score_list = np.full(len(annot_score_list), fill_value=np.nan, dtype=name_score_list.dtype)
        #score_list = np.nan(len(annot_score_list), dtype=name_score_list.dtype)
        # HACK: we need to set these to 'low' values and we also have to respect negatives
        #score_list[:] = -np.inf
        # make sure that the nid_list from group_indicies and the nids belonging to
        # name_score_list (cm.unique_nids) are in alignment
        #nidx_list = np.array(ut.dict_take(nid2_nidx, nid_list))

        # THIS ASSUMES name_score_list IS IN ALIGNMENT WITH BOTH cm.unique_nids and
        # nid_list (which should be == cm.unique_nids)
        score_list[best_idx_list] = name_score_list
        return score_list
예제 #19
0
def group_scores_by_name(ibs, aid_list, score_list):
    r"""
    Converts annotation scores to name scores.
    Over multiple annotations finds keypoints best match and uses that score.

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-group_scores_by_name

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *   # NOQA
        >>> import ibeis
        >>> cm, qreq_ = ibeis.testdata_cm('PZ_MTEST')
        >>> ibs = qreq_.ibs
        >>> #print(cm.get_inspect_str(qreq_))
        >>> aid_list = cm.daid_list
        >>> score_list = cm.annot_score_list
        >>> nscoretup = group_scores_by_name(ibs, aid_list, score_list)
        >>> (sorted_nids, sorted_nscore, sorted_aids, sorted_scores) = nscoretup
        >>> ut.assert_eq(sorted_nids[0], cm.qnid)

    TODO:
        # TODO: this code needs a really good test case
        #>>> result = np.array_repr(sorted_nids[0:2])
        #>>> print(result)
        #array([1, 5])

        Ignore::
            # hack in dict of Nones prob for testing
            import six
            qres.aid2_prob = {aid:None for aid in six.iterkeys(qres.aid2_score)}

        array([ 1,  5, 26])
        [2 6 5]

        Timeit::
            import ibeis
            ibs = ibeis.opendb('PZ_MTEST')
            aid_list = ibs.get_valid_aids()
            aid_arr = np.array(aid_list)
            %timeit ibs.get_annot_name_rowids(aid_list)
            %timeit ibs.get_annot_name_rowids(aid_arr)


    """
    assert len(score_list) == len(aid_list), 'scores and aids must be associated'
    score_arr = np.array(score_list)
    nid_list  = np.array(ibs.get_annot_name_rowids(aid_list))
    aid_list  = np.array(aid_list)
    # Group scores by name
    unique_nids, groupxs = vt.group_indices(nid_list)
    grouped_scores = np.array(vt.apply_grouping(score_arr, groupxs))
    grouped_aids   = np.array(vt.apply_grouping(aid_list, groupxs))
    # Build representative score per group
    # (find each keypoints best match per annotation within the name)
    group_nscore = np.array([scores.max() for scores in grouped_scores])
    group_sortx = group_nscore.argsort()[::-1]
    # Top nids
    sorted_nids = unique_nids.take(group_sortx, axis=0)
    sorted_nscore = group_nscore.take(group_sortx, axis=0)
    # Initial sort of aids
    _sorted_aids   = grouped_aids.take(group_sortx, axis=0)
    _sorted_scores = grouped_scores.take(group_sortx, axis=0)
    # Secondary sort of aids
    sorted_sortx  = [scores.argsort()[::-1] for scores in _sorted_scores]
    sorted_scores = [scores.take(sortx) for scores, sortx in zip(_sorted_scores, sorted_sortx)]
    sorted_aids   = [aids.take(sortx) for aids, sortx in zip(_sorted_aids, sorted_sortx)]
    nscoretup     = NameScoreTup(sorted_nids, sorted_nscore, sorted_aids, sorted_scores)
    return nscoretup
예제 #20
0
def get_name_aids(ibs, nid_list, enable_unknown_fix=True):
    r"""
    # TODO: Rename to get_anot_rowids_from_name_rowid

    Returns:
         list: aids_list a list of list of aids in each name

    RESTful:
        Method: GET
        URL:    /api/name/aids/

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_name_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> # Map annotations to name ids
        >>> aid_list = ibs.get_valid_aids()
        >>> nid_list = ibs.get_annot_name_rowids(aid_list)
        >>> # Get annotation ids for each name
        >>> aids_list = ibs.get_name_aids(nid_list)
        >>> # Run Assertion Test
        >>> groupid2_items = ut.group_items(aids_list, nid_list)
        >>> grouped_items = list(six.itervalues(groupid2_items))
        >>> passed_iter = map(ut.allsame, grouped_items)
        >>> passed_list = list(passed_iter)
        >>> assert all(passed_list), 'problem in get_name_aids'
        >>> # Print gropued items
        >>> print(ut.dict_str(groupid2_items, newlines=False))

    Ignore;
        from ibeis.control.manual_name_funcs import *  # NOQA
        import ibeis
        #ibs = ibeis.opendb('testdb1')
        #ibs = ibeis.opendb('PZ_MTEST')
        ibs = ibeis.opendb('PZ_Master0')
        #ibs = ibeis.opendb('GZ_ALL')

        nid_list = ibs.get_valid_nids()
        nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]

        with ut.Timer('sql'):
            #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False)
            aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False)

        with ut.Timer('hackquery + group'):
            opstr = '''
            SELECT annot_rowid, name_rowid
            FROM annotations
            WHERE name_rowid IN
                (%s)
                ORDER BY name_rowid ASC, annot_rowid ASC
            ''' % (', '.join(map(str, nid_list)))
            pair_list = ibs.db.connection.execute(opstr).fetchall()
            aids = np.array(ut.get_list_column(pair_list, 0))
            nids = np.array(ut.get_list_column(pair_list, 1))
            unique_nids, groupx = vt.group_indices(nids)
            grouped_aids_ = vt.apply_grouping(aids, groupx)
            aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_]

        for aids1, aids5 in zip(aids_list1, aids_list5):
            if (aids1) != (aids5):
                print(aids1)
                print(aids5)
                print('-----')

        ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1)))

        with ut.Timer('numpy'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_]

        with ut.Timer('numpy2'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]

        with ut.Timer('numpy3'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]
        assert aids_list2 == aids_list3
        assert aids_list3 == aids_list4
        assert aids_list1 == aids_list2

        valid_aids = ibs.get_valid_aids()
        %timeit ibs.db.get_all_col_rows('annotations', 'rowid')
        %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid')
        %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        %timeit ibs.get_valid_aids()
        %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False)
        valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid')
        assert valid_nids1 == valid_nids2

    ibs.db.fname
    ibs.db.fpath

    import sqlite3

    con = sqlite3.connect(ibs.db.fpath)

    opstr = '''
    SELECT annot_rowid, name_rowid
    FROM annotations
    WHERE name_rowid IN
        (SELECT name_rowid FROM name)
        ORDER BY name_rowid ASC, annot_rowid ASC
    '''

    annot_rowid_list = con.execute(opstr).fetchall()
    aid_list = ut.get_list_column(annot_rowid_list, 0)
    nid_list = ut.get_list_column(annot_rowid_list, 1)


    # HACKY HACKY HACK

    with ut.Timer('hackquery + group'):
        #nid_list = ibs.get_valid_nids()[10:15]
        nid_list = ibs.get_valid_nids()
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM annotations
        WHERE name_rowid IN
            (%s)
            ORDER BY name_rowid ASC, annot_rowid ASC
        ''' % (', '.join(map(str, nid_list)))
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aids = np.array(ut.get_list_column(pair_list, 0))
        nids = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nids)
        grouped_aids_ = vt.apply_grouping(aids, groupx)
        grouped_aids = [arr.tolist() for arr in grouped_aids_]

    SELECT
       name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list
    FROM annotations
    WHERE name_rowid in (SELECT name_rowid FROM name)
     GROUP BY name_rowid
    ORDER BY name_rowid ASC


    import vtool as vt
    vt
    vt.aid_list[0]


    annot_rowid_list = con.execute(opstr).fetchall()
    opstr = '''
        SELECT annot_rowid
        FROM annotations
        WHERE name_rowid=?
        '''

    cur = ibs.db.connection.cursor()

    cur = con.execute('BEGIN IMMEDIATE TRANSACTION')
    cur = ibs.db.connection
    res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]
    cur.execute('COMMIT TRANSACTION')

    res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]

    """
    # FIXME: THIS FUNCTION IS VERY SLOW
    # ADD A LOCAL CACHE TO FIX THIS SPEED
    # ALSO FIX GET_IMAGE_AIDS
    # really a getter for the annotation table not the name table
    #return [[] for nid in nid_list]
    # TODO: should a query of the UNKNOWN_NAME_ROWID return anything?
    # TODO: don't even run negative aids as queries
    nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]
    USE_GROUPING_HACK = False
    if USE_GROUPING_HACK:
        # This code doesn't work because it doesn't respect empty names
        input_list, inverse_unique = np.unique(nid_list_, return_inverse=True)
        input_str = ', '.join(list(map(str, input_list)))
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM {ANNOTATION_TABLE}
        WHERE name_rowid IN
            ({input_str})
            ORDER BY name_rowid ASC, annot_rowid ASC
        '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE)
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aidscol = np.array(ut.get_list_column(pair_list, 0))
        nidscol = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nidscol)
        grouped_aids_ = vt.apply_grouping(aidscol, groupx)
        #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_]
        structured_aids_list = [arr.tolist() for arr in grouped_aids_]
        aids_list = np.array(structured_aids_list)[inverse_unique].tolist()
    else:
        USE_NUMPY_IMPL = True
        #USE_NUMPY_IMPL = False
        # Use qt if getting one at a time otherwise perform bulk operation
        USE_NUMPY_IMPL = len(nid_list_) > 1
        #USE_NUMPY_IMPL = len(nid_list_) > 10
        if USE_NUMPY_IMPL:
            # This seems to be 30x faster for bigger inputs
            valid_aids = np.array(ibs._get_all_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))

            # MEMORY HOG LIKE A SON OF A BITCH
            # aids_list = [
            #     valid_aids.take(np.flatnonzero(
            #         np.equal(valid_nids, nid))).tolist()
            #     for nid in nid_list_
            # ]

            temp = np.zeros((len(valid_nids), ), dtype=np.bool)
            aids_dict = {}
            nid_list_unique = np.unique(nid_list_)
            for nid in nid_list_unique:
                bool_list = np.equal(valid_nids, nid, out=temp)
                flattened = np.flatnonzero(bool_list)
                aid_list = [] if nid < 0 else valid_aids.take(flattened)
                aid_list = aid_list.tolist()
                aids_dict[nid] = aid_list

            aids_list = ut.dict_take(aids_dict, nid_list_)
        else:
            # SQL IMPL
            aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,),
                                   nid_list_, id_colname=NAME_ROWID,
                                   unpack_scalars=False)
    if enable_unknown_fix:
        #enable_unknown_fix == distinguish_unknowns
        # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid
        #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids)
        #             for nid, aids in zip(nid_list, aids_list)]
        # Not sure if this should fail or return empty list on None nid
        aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids)
                     for nid, aids in zip(nid_list, aids_list)]
        #aids_list = [[-nid] if nid < 0 else aids
        #             for nid, aids in zip(nid_list, aids_list)]
    return aids_list
예제 #21
0
def analyze(ibsmap, qreq_dict, species_dict, path_to_file_list, params):
    print('[analyze] Beginning Analyze')
    print('[analyze] Received %d file paths' % (len(path_to_file_list)))
    # decompose the filename to get the car/person to whom this image belongs
    info_tup_list = [preprocess_fpath(ibsmap, species_dict, path_to_file, params) for path_to_file in path_to_file_list]
    is_valid_list = [tup_ is not None for tup_ in info_tup_list]

    # get the ungrouped tuples that were not None
    valid_tup_list_ug = ut.filter_items(info_tup_list, is_valid_list)
    valid_path_list_ug = ut.filter_items(path_to_file_list, is_valid_list)

    # group by species
    valid_species_list_ug = ut.get_list_column(valid_tup_list_ug, 3)
    seen_species = {}
    def get_species_tmpid(txt):
        if txt in seen_species:
            return seen_species[txt]
        else:
            seen_species[txt] = len(seen_species)
            return get_species_tmpid(txt)
    species_tmpid_list = np.array([get_species_tmpid(txt) for txt in valid_species_list_ug])
    #ibs.get_species_rowids_from_text(valid_species_list_ug)
    unique_species_rowids, groupxs = vt.group_indices(np.array(species_tmpid_list))

    grouped_valid_tup_list = vt.apply_grouping(np.array(valid_tup_list_ug, dtype=object), groupxs)
    grouped_path_list = vt.apply_grouping(np.array(valid_path_list_ug, dtype=object), groupxs)

    print('[analyze] Created  %d species groups' % (len(grouped_valid_tup_list)))
    print('[analyze] grouped_valid_tup_list = ' + ut.list_str(grouped_valid_tup_list))
    print('[analyze] grouped_path_list      = ' + ut.list_str(grouped_path_list))

    assert len(grouped_valid_tup_list) == len(grouped_path_list), 'lengths must match for zip'
    for groupx, (tup, valid_path_list) in enumerate(zip(grouped_valid_tup_list, grouped_path_list)):
        car_list, person_list, animal_list, species_list, offset_list, contributor_row_id_list = zip(*tup)

        assert ut.list_allsame(species_list)

        animal = animal_list[0]
        species = species_list[0]
        ibs = ibsmap[animal]
        with ut.Indenter('[GROUP-%d-%s]' % (groupx, species)):
            assert ((animal == 'zebra' and species == species_dict['zebra']) or
                    (animal == 'giraffe' and species == species_dict['giraffe'])), 'animal/species mismatch!'
            # Add image to database
            gid_list = ibs.add_images(valid_path_list, auto_localize=False)

            reported_time_list = list(map(vt.parse_exif_unixtime, valid_path_list))
            actual_unixtime_list = [
                reported_unixtime + offset
                for reported_unixtime, offset in
                zip(reported_time_list, offset_list)
            ]
            ibs.set_image_unixtime(gid_list, actual_unixtime_list, duplicate_behavior='filter')
            ibs.set_image_contributor_rowid(gid_list, contributor_row_id_list, duplicate_behavior='filter')

            print('[analyze] starting detection for %d images and species %s...' % (len(valid_path_list), species))
            qaids_list = ibs.detect_random_forest(gid_list, species=species)
            qaid_list, reverse_list = ut.invertible_flatten2(qaids_list)
            print('\n[analyze] detected %d animals of species %s' % (len(qaid_list), species))

            # if there were no detections, don't bother
            if not qaid_list:
                continue

            # because qreq_ is persistent we need only to update the qaid_list
            qreq_ = qreq_dict[animal]  # there is a qreq_ for each species
            qaid_list_unique, unique_inverse = np.unique(qaid_list, return_inverse=True)
            qreq_.set_external_qaids(qaid_list_unique)
            qres_list_unique = ibs.query_chips(qreq_=qreq_, verbose=False)
            qres_list = ut.list_take(qres_list_unique, unique_inverse)

            # so that we can draw a new bounding box for each detection
            detection_bbox_list = ibs.get_annot_verts(qaid_list)
            detection_bboxes_list = ut.unflatten2(detection_bbox_list, reverse_list)
            qreses_list = ut.unflatten2(qres_list, reverse_list)

            with ut.Indenter('[POSTPROCESS]'):
                for _tup in zip(valid_path_list, detection_bboxes_list, qreses_list,
                                car_list, person_list, animal_list, gid_list, qaids_list):
                    postprocess_result(ibs, _tup, params)

            with ut.Indenter('[REVIEW_CHECK]'):
                for car, person in zip(car_list, person_list):
                    check_if_need_review(person, car, params)
예제 #22
0
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None):
    """
    Undirected version

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> # setup_pzmtest_subgraph()
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> aid_list = ibs.get_valid_aids()[0:4]
        >>> eager = True
        >>> nInput = None
        >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list,
        >>>                                                        eager, nInput)
        >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),))
        >>> print(result)

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid_list = ibs.get_valid_aids()
        >>> from functools import partial
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid_list=aid_list, num_list=num_list):
        >>>    return (aid_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['combo', 'sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if nInput is None:
        nInput = len(aid_list)

    if force_method != 2 and (nInput < 256 or (force_method == 1)):
        rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list)
        # This one is slow because aid2 is the second part of the index
        rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
        annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1, rowids2)))  # NOQA
    else:
        # This is much much faster than the other methods for large queries
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids))
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid1, groupxs1 = vt.group_indices(aids1)
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids1_ = [_.tolist() for _ in rowids1_]
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping1 = dict(zip(unique_aid1, rowids1_))
        maping2 = dict(zip(unique_aid2, rowids2_))
        mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2))
        annotmatch_rowid_list = ut.dict_take(mapping, aid_list)

    if False:
        # VERY SLOW
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,)
        # FIXME: col_rowid is not correct
        params_iter = list(zip(aid_list, aid_list))
        where_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID1, _autogen_annotmatch_funcs.ANNOT_ROWID2]
        with ut.Timer('one'):
            annotmatch_rowid_list1 = ibs.db.get_where3(  # NOQA
                ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, where_colnames,
                logicop='OR', eager=eager, nInput=nInput, unpack_scalars=False)
    # Ensure funciton output is consistent
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
예제 #23
0
    def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs):
        """
            >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
            >>> qreq_ = ibeis.testdata_qreq_(defaultdb='seaturtles')
            >>> qreq_.load_indexer()
            >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0])
            >>> num_neighbors = 2
            >>> nnindexer = qreq_.indexer
            >>> ibs = qreq_.ibs
            >>> qaid = 1
            >>> qencid = ibs.get_annot_encounter_text([qaid])[0]
            >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid))
            >>> invalid_axs = np.where(ax2_encid == qencid)[0]
        """
        #import ibeis
        import itertools

        def in1d_shape(arr1, arr2):
            return np.in1d(arr1, arr2).reshape(arr1.shape)

        get_neighbors = ut.partial(nnindexer.flann.nn_index,
                                   checks=nnindexer.checks,
                                   cores=nnindexer.cores)

        # Alloc space for final results
        K = num_neighbors
        shape = (len(qfx2_vec), K)
        qfx2_idx = np.full(shape, -1, dtype=np.int32)
        qfx2_rawdist = np.full(shape, np.nan, dtype=np.float64)
        qfx2_truek = np.full(shape, -1, dtype=np.int32)

        # Make a set of temporary indexes and loop variables
        limit = None
        limit = 4
        K_ = K
        tx2_qfx = np.arange(len(qfx2_vec))
        tx2_vec = qfx2_vec
        iter_count = 0
        for iter_count in itertools.count():
            if limit is not None and iter_count >= limit:
                break
            # Find a set of neighbors
            (tx2_idx, tx2_rawdist) = get_neighbors(tx2_vec, K_)
            tx2_idx = vt.atleast_nd(tx2_idx, 2)
            tx2_rawdist = vt.atleast_nd(tx2_rawdist, 2)
            tx2_ax = nnindexer.get_nn_axs(tx2_idx)
            # Check to see if they meet the criteria
            tx2_invalid = in1d_shape(tx2_ax, invalid_axs)
            tx2_valid = np.logical_not(tx2_invalid)
            tx2_num_valid = tx2_valid.sum(axis=1)
            tx2_notdone = tx2_num_valid < K
            tx2_done = np.logical_not(tx2_notdone)

            # Move completely valid queries into the results
            if np.any(tx2_done):
                done_qfx = tx2_qfx.compress(tx2_done, axis=0)
                # Need to parse which columns are the completed ones
                done_valid_ = tx2_valid.compress(tx2_done, axis=0)
                done_rawdist_ = tx2_rawdist.compress(tx2_done, axis=0)
                done_idx_ = tx2_idx.compress(tx2_done, axis=0)
                # Get the complete valid indicies
                rowxs, colxs = np.where(done_valid_)
                unique_rows, groupxs = vt.group_indices(rowxs)
                first_k_groupxs = [groupx[0:K] for groupx in groupxs]
                chosen_xs = np.hstack(first_k_groupxs)
                multi_index = (rowxs.take(chosen_xs), colxs.take(chosen_xs))
                flat_xs = np.ravel_multi_index(multi_index, done_valid_.shape)
                done_rawdist = done_rawdist_.take(flat_xs).reshape((-1, K))
                done_idx = done_idx_.take(flat_xs).reshape((-1, K))
                # Write done results in output
                qfx2_idx[done_qfx, :] = done_idx
                qfx2_rawdist[done_qfx, :] = done_rawdist
                qfx2_truek[done_qfx, :] = vt.apply_grouping(
                    colxs, first_k_groupxs)
            if np.all(tx2_done):
                break
            K_increase = (K - tx2_num_valid.min())
            K_ += K_increase
            tx2_qfx = tx2_qfx.compress(tx2_notdone, axis=0)
            tx2_vec = tx2_vec.compress(tx2_notdone, axis=0)

        if nnindexer.max_distance_sqrd is not None:
            qfx2_dist = np.divide(qfx2_rawdist, nnindexer.max_distance_sqrd)
        else:
            qfx2_dist = qfx2_rawdist
        return (qfx2_idx, qfx2_dist, iter_count)
예제 #24
0
def get_name_aids(ibs, nid_list, enable_unknown_fix=True):
    r"""
    # TODO: Rename to get_anot_rowids_from_name_rowid

    Returns:
         list: aids_list a list of list of aids in each name

    RESTful:
        Method: GET
        URL:    /api/name/aids/

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_name_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> # Map annotations to name ids
        >>> aid_list = ibs.get_valid_aids()
        >>> nid_list = ibs.get_annot_name_rowids(aid_list)
        >>> # Get annotation ids for each name
        >>> aids_list = ibs.get_name_aids(nid_list)
        >>> # Run Assertion Test
        >>> groupid2_items = ut.group_items(aids_list, nid_list)
        >>> grouped_items = list(six.itervalues(groupid2_items))
        >>> passed_iter = map(ut.list_allsame, grouped_items)
        >>> passed_list = list(passed_iter)
        >>> assert all(passed_list), 'problem in get_name_aids'
        >>> # Print gropued items
        >>> print(ut.dict_str(groupid2_items, newlines=False))

    Ignore;
        from ibeis.control.manual_name_funcs import *  # NOQA
        import ibeis
        #ibs = ibeis.opendb('testdb1')
        #ibs = ibeis.opendb('PZ_MTEST')
        ibs = ibeis.opendb('PZ_Master0')
        #ibs = ibeis.opendb('GZ_ALL')

        nid_list = ibs.get_valid_nids()
        nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]

        with ut.Timer('sql'):
            #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False)
            aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False)

        with ut.Timer('hackquery + group'):
            opstr = '''
            SELECT annot_rowid, name_rowid
            FROM annotations
            WHERE name_rowid IN
                (%s)
                ORDER BY name_rowid ASC, annot_rowid ASC
            ''' % (', '.join(map(str, nid_list)))
            pair_list = ibs.db.connection.execute(opstr).fetchall()
            aids = np.array(ut.get_list_column(pair_list, 0))
            nids = np.array(ut.get_list_column(pair_list, 1))
            unique_nids, groupx = vt.group_indices(nids)
            grouped_aids_ = vt.apply_grouping(aids, groupx)
            aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_]

        for aids1, aids5 in zip(aids_list1, aids_list5):
            if (aids1) != (aids5):
                print(aids1)
                print(aids5)
                print('-----')

        ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1)))

        with ut.Timer('numpy'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_]

        with ut.Timer('numpy2'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]

        with ut.Timer('numpy3'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]
        assert aids_list2 == aids_list3
        assert aids_list3 == aids_list4
        assert aids_list1 == aids_list2

        valid_aids = ibs.get_valid_aids()
        %timeit ibs.db.get_all_col_rows('annotations', 'rowid')
        %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid')
        %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        %timeit ibs.get_valid_aids()
        %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False)
        valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid')
        assert valid_nids1 == valid_nids2

    ibs.db.fname
    ibs.db.fpath

    import sqlite3

    con = sqlite3.connect(ibs.db.fpath)

    opstr = '''
    SELECT annot_rowid, name_rowid
    FROM annotations
    WHERE name_rowid IN
        (SELECT name_rowid FROM name)
        ORDER BY name_rowid ASC, annot_rowid ASC
    '''

    annot_rowid_list = con.execute(opstr).fetchall()
    aid_list = ut.get_list_column(annot_rowid_list, 0)
    nid_list = ut.get_list_column(annot_rowid_list, 1)


    # HACKY HACKY HACK

    with ut.Timer('hackquery + group'):
        #nid_list = ibs.get_valid_nids()[10:15]
        nid_list = ibs.get_valid_nids()
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM annotations
        WHERE name_rowid IN
            (%s)
            ORDER BY name_rowid ASC, annot_rowid ASC
        ''' % (', '.join(map(str, nid_list)))
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aids = np.array(ut.get_list_column(pair_list, 0))
        nids = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nids)
        grouped_aids_ = vt.apply_grouping(aids, groupx)
        grouped_aids = [arr.tolist() for arr in grouped_aids_]

    SELECT
       name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list
    FROM annotations
    WHERE name_rowid in (SELECT name_rowid FROM name)
     GROUP BY name_rowid
    ORDER BY name_rowid ASC


    import vtool as vt
    vt
    vt.aid_list[0]


    annot_rowid_list = con.execute(opstr).fetchall()
    opstr = '''
        SELECT annot_rowid
        FROM annotations
        WHERE name_rowid=?
        '''

    cur = ibs.db.connection.cursor()

    cur = con.execute('BEGIN IMMEDIATE TRANSACTION')
    cur = ibs.db.connection
    res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]
    cur.execute('COMMIT TRANSACTION')

    res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]

    """
    # FIXME: THIS FUNCTION IS VERY SLOW
    # ADD A LOCAL CACHE TO FIX THIS SPEED
    # ALSO FIX GET_IMAGE_AIDS
    # really a getter for the annotation table not the name table
    #return [[] for nid in nid_list]
    # TODO: should a query of the UNKNOWN_NAME_ROWID return anything?
    # TODO: don't even run negative aids as queries
    nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]
    USE_GROUPING_HACK = False
    if USE_GROUPING_HACK:
        # This code doesn't work because it doesn't respect empty names
        input_list, inverse_unique = np.unique(nid_list_, return_inverse=True)
        input_str = ', '.join(list(map(str, input_list)))
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM {ANNOTATION_TABLE}
        WHERE name_rowid IN
            ({input_str})
            ORDER BY name_rowid ASC, annot_rowid ASC
        '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE)
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aidscol = np.array(ut.get_list_column(pair_list, 0))
        nidscol = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nidscol)
        grouped_aids_ = vt.apply_grouping(aidscol, groupx)
        #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_]
        structured_aids_list = [arr.tolist() for arr in grouped_aids_]
        aids_list = np.array(structured_aids_list)[inverse_unique].tolist()
    else:
        USE_NUMPY_IMPL = True
        #USE_NUMPY_IMPL = False
        # Use qt if getting one at a time otherwise perform bulk operation
        USE_NUMPY_IMPL = len(nid_list_) > 1
        #USE_NUMPY_IMPL = len(nid_list_) > 10
        if USE_NUMPY_IMPL:
            # This seems to be 30x faster for bigger inputs
            valid_aids = np.array(ibs._get_all_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list = [
                valid_aids.take(np.flatnonzero(
                    np.equal(valid_nids, nid))).tolist()
                for nid in nid_list_
            ]
        else:
            # SQL IMPL
            aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,),
                                   nid_list_, id_colname=NAME_ROWID,
                                   unpack_scalars=False)
    if enable_unknown_fix:
        #enable_unknown_fix == distinguish_unknowns
        # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid
        #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids)
        #             for nid, aids in zip(nid_list, aids_list)]
        # Not sure if this should fail or return empty list on None nid
        aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids)
                     for nid, aids in zip(nid_list, aids_list)]
        #aids_list = [[-nid] if nid < 0 else aids
        #             for nid, aids in zip(nid_list, aids_list)]
    return aids_list
예제 #25
0
def get_review_edges(cm_list, ibs=None, review_cfg={}):
    r"""
    Needs to be moved to a better file. Maybe something to do with
    identification.

    Returns a list of matches that should be inspected
    This function is more lightweight than orgres or allres.
    Used in id_review_api and interact_qres2

    Args:
        cm_list (list): list of chip match objects
        ranks_top (int): put all ranks less than this number into the graph
        directed (bool):

    Returns:
        tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)

    CommandLine:
        python -m ibeis.gui.id_review_api get_review_edges:0

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_()
        >>> cm_list = qreq_.execute()
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg)
        >>> print(review_edges)

    Example1:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20')
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example3:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100')
        >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example4:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10')
        >>> ranks_top = 3
        >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)
    """
    import vtool as vt
    from ibeis.algo.hots import chip_match
    automatch_kw = REVIEW_CFG_DEFAULTS.copy()
    automatch_kw = ut.update_existing(automatch_kw, review_cfg)
    print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw)))
    print('[resorg] len(cm_list) = %d' % (len(cm_list)))
    qaids_stack  = []
    daids_stack  = []
    ranks_stack  = []
    scores_stack = []

    # For each QueryResult, Extract inspectable candidate matches
    if isinstance(cm_list, dict):
        cm_list = list(cm_list.values())

    if len(cm_list) == 0:
        return ([], [], [], [])

    for cm in cm_list:
        if isinstance(cm, chip_match.ChipMatch):
            daids  = cm.get_top_aids(ntop=automatch_kw['ranks_top'])
            scores = cm.get_top_scores(ntop=automatch_kw['ranks_top'])
            ranks  = np.arange(len(daids))
            qaids  = np.full(daids.shape, cm.qaid, dtype=daids.dtype)
        else:
            (qaids, daids, scores, ranks) = cm.get_match_tbldata(
                ranks_top=automatch_kw['ranks_top'],
                name_scoring=automatch_kw['name_scoring'],
                ibs=ibs)
        qaids_stack.append(qaids)
        daids_stack.append(daids)
        scores_stack.append(scores)
        ranks_stack.append(ranks)

    # Stack them into a giant array
    qaid_arr  = np.hstack(qaids_stack)
    daid_arr  = np.hstack(daids_stack)
    score_arr = np.hstack(scores_stack)
    rank_arr  = np.hstack(ranks_stack)

    # Sort by scores
    sortx = score_arr.argsort()[::-1]
    qaid_arr  = qaid_arr[sortx]
    daid_arr   = daid_arr[sortx]
    score_arr = score_arr[sortx]
    rank_arr  = rank_arr[sortx]

    # IS_REVIEWED DOES NOT WORK
    if automatch_kw['filter_reviewed']:
        _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(),
                                                      daid_arr.tolist())
        is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool)
        qaid_arr  = qaid_arr.compress(is_unreviewed)
        daid_arr   = daid_arr.compress(is_unreviewed)
        score_arr = score_arr.compress(is_unreviewed)
        rank_arr  = rank_arr.compress(is_unreviewed)

    # Remove directed edges
    if not automatch_kw['directed']:
        #nodes = np.unique(directed_edges.flatten())
        directed_edges = np.vstack((qaid_arr, daid_arr)).T
        #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1])

        unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges,
                                                           score_arr)

        qaid_arr  = qaid_arr.take(unique_rowx)
        daid_arr  = daid_arr.take(unique_rowx)
        score_arr = score_arr.take(unique_rowx)
        rank_arr  = rank_arr.take(unique_rowx)

    # Filter Double Name Matches
    if automatch_kw['filter_duplicate_true_matches']:
        # filter_dup_namepairs
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        if not automatch_kw['directed']:
            directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T
            unique_rowx2 = vt.find_best_undirected_edge_indexes(
                directed_name_edges, score_arr)
        else:
            namepair_id_list = np.array(vt.compute_unique_data_ids_(
                list(zip(qnid_arr, dnid_arr))))
            unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list)
            score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs)
            unique_rowx2 = np.array(sorted([
                groupx[score_group.argmax()]
                for groupx, score_group in zip(namepair_groupxs, score_namepair_groups)
            ]), dtype=np.int32)
        qaid_arr  = qaid_arr.take(unique_rowx2)
        daid_arr  = daid_arr.take(unique_rowx2)
        score_arr = score_arr.take(unique_rowx2)
        rank_arr  = rank_arr.take(unique_rowx2)

    # Filter all true matches
    if automatch_kw['filter_true_matches']:
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        valid_flags = qnid_arr != dnid_arr
        qaid_arr  = qaid_arr.compress(valid_flags)
        daid_arr   = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr  = rank_arr.compress(valid_flags)

    if automatch_kw['filter_photobombs']:
        unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr]))
        #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids)
        invalid_nid_map = get_photobomber_map(ibs, qaid_arr)

        nid2_aids = ut.group_items(unique_aids, ibs.get_annot_nids(unique_aids))

        expanded_aid_map = ut.ddict(set)
        for nid1, other_nids in invalid_nid_map.items():
            for aid1 in nid2_aids[nid1]:
                for nid2 in other_nids:
                    for aid2 in nid2_aids[nid2]:
                        expanded_aid_map[aid1].add(aid2)
                        expanded_aid_map[aid2].add(aid1)

        valid_flags = [daid not in expanded_aid_map[qaid]
                       for qaid, daid in zip(qaid_arr, daid_arr)]
        qaid_arr  = qaid_arr.compress(valid_flags)
        daid_arr   = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr  = rank_arr.compress(valid_flags)

    review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)
    return review_edges
예제 #26
0
def make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin=4,
                            grid_steps=1, resize=False, out=None, grid_sigma=1.6):
    r"""
    Args:
        kpts (ndarray[float32_t, ndim=2]):  keypoint
        chipsize (tuple):  width, height
        weights (ndarray[float32_t, ndim=1]):
        pxl_per_bin (float):
        grid_steps (int):

    Returns:
        ndarray: weightgrid

    CommandLine:
        python -m vtool.coverage_grid --test-make_grid_coverage_mask --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.coverage_grid import *  # NOQA
        >>> import vtool as vt
        >>> # build test data
        >>> kpts, chipsize, weights = coverage_kpts.testdata_coverage('easy1.png')
        >>> pxl_per_bin = 4
        >>> grid_steps = 2
        >>> # execute function
        >>> weightgrid = make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin, grid_steps)
        >>> # verify result
        >>> result = str(weightgrid)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> pt.imshow(weightgrid)
        >>> ut.show_if_requested()
    """
    import vtool as vt
    coverage_gridtup = sparse_grid_coverage(
        kpts, chipsize, weights,
        pxl_per_bin=pxl_per_bin,
        grid_steps=grid_steps,
        grid_sigma=grid_sigma
    )
    gridshape = coverage_gridtup[0:2]
    neighbor_bin_weights, neighbor_bin_indices = coverage_gridtup[-2:]
    oldshape_indices = neighbor_bin_indices.shape
    newshape_indices = (np.prod(oldshape_indices[0:2]), oldshape_indices[2])
    neighbor_bin_indices =  neighbor_bin_indices.reshape(newshape_indices).T
    neighbor_bin_weights = neighbor_bin_weights.flatten()
    # Get flat indexing into gridbin
    neighbor_bin_flat_indices = np.ravel_multi_index(neighbor_bin_indices, gridshape)
    # Group by bins with weight
    unique_flatxs, grouped_flatxs = vt.group_indices(neighbor_bin_flat_indices)
    grouped_weights = vt.apply_grouping(neighbor_bin_weights, grouped_flatxs)
    # FIXME: boundary cases are not handled right because their vote is split
    # into the same bin and is fighting with itself durring the max
    max_weights = list(map(np.max, grouped_weights))
    if out is None:
        weightgrid = np.zeros(gridshape)
    else:
        # outvar specified
        weightgrid = out
        weightgrid[:] = 0
    unique_rows, unique_cols = np.unravel_index(unique_flatxs, gridshape)
    weightgrid[unique_rows, unique_cols] = max_weights
    #flat_weightgrid = np.zeros(np.prod(gridshape))
    #flat_weightgrid[unique_flatxs] = max_weight
    #ut.embed()
    #weightgrid = np.reshape(flat_weightgrid, gridshape)
    if resize:
        weightgrid = cv2.resize(weightgrid, chipsize,
                                interpolation=cv2.INTER_NEAREST)
    return weightgrid
예제 #27
0
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.bayes --exec-try_query --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.bayes import *  # NOQA
        >>> verbose = True
        >>> other_evidence = {}
        >>> name_evidence = [1, None, 0, None]
        >>> score_evidence = ['high', 'low', 'low']
        >>> query_vars = None
        >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1)
        >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence)
        >>> interest_ttypes = ['name']
        >>> infr = pgmpy.inference.BeliefPropagation(model)
        >>> evidence = infr._ensure_internal_evidence(evidence, model)
        >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose)
        >>> result = ('query_results = %s' % (str(query_results),))
        >>> ut.quit_if_noshow()
        >>> show_model(model, show_prior=True, **query_results)
        >>> ut.show_if_requested()

    Ignore:
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        probs = infr.query(query_vars, evidence)
        map_assignment = infr.map_query(query_vars, evidence)
    """
    infr = pgmpy.inference.VariableElimination(model)
    #infr = pgmpy.inference.BeliefPropagation(model)
    if True:
        return bruteforce(model, query_vars=None, evidence=evidence)
    else:
        import vtool as vt
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        # hack
        query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable'))
        if verbose:
            evidence_str = ', '.join(model.pretty_evidence(evidence))
            print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ')
        # Compute MAP joints
        # There is a bug here.
        #map_assign = infr.map_query(query_vars, evidence)
        # (probably an invalid thing to do)
        #joint_factor = pgmpy.factors.factor_product(*factor_list)
        # Brute force MAP

        name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys()))
        # TODO: incorporate case where Na is assigned to Fred
        #evidence_h = ut.delete_keys(evidence.copy(), ['Na'])

        joint = model.joint_distribution()
        joint.evidence_based_reduction(
            query_name_vars, evidence, inplace=True)

        # Find static row labels in the evidence
        given_name_vars = [var for var in name_vars if var in evidence]
        given_name_idx = ut.dict_take(evidence, given_name_vars)
        given_name_val = [joint.statename_dict[var][idx]
                          for var, idx in zip(given_name_vars, given_name_idx)]
        new_vals = joint.values.ravel()
        # Add static evidence variables to the relabeled name states
        new_vars = given_name_vars + joint.variables
        new_rows = [tuple(given_name_val) + row for row in joint._row_labels()]
        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.
        temp_basis = [i for i in range(model.num_names)]
        def relabel_names(names, temp_basis=temp_basis):
            names = list(map(six.text_type, names))
            mapping = {}
            for n in names:
                if n not in mapping:
                    mapping[n] = len(mapping)
            new_names = tuple([temp_basis[mapping[n]] for n in names])
            return new_names
        relabeled_rows = list(map(relabel_names, new_rows))
        # Combine probability of rows with the same (new) label
        data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows))
        unique_ids, groupxs = vt.group_indices(data_ids)
        reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0))
        reduced_row_lbls = list(map(list, reduced_row_lbls))
        reduced_values = np.array([
            g.sum() for g in vt.apply_grouping(new_vals, groupxs)
        ])
        # Relabel the rows one more time to agree with initial constraints
        used_ = []
        replaced = []
        for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)):
            # All columns must be the same for this labeling
            alias = reduced_row_lbls[0][colx]
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val)
            replaced.append(alias)
            used_.append(val)
        basis = model.ttype2_cpds['name'][0]._template_.basis
        find_remain_ = ut.setdiff_ordered(temp_basis, replaced)
        repl_remain_ = ut.setdiff_ordered(basis, used_)
        for find, repl in zip(find_remain_, repl_remain_):
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl)

        # Now find the most likely state
        sortx = reduced_values.argsort()[::-1]
        sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist())
        sort_reduced_values = reduced_values[sortx]

        # Remove evidence based labels
        new_vars_ = new_vars[len(given_name_vars):]
        sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None))

        sort_reduced_row_lbls_[0]

        # hack into a new joint factor
        var_states = ut.lmap(ut.unique_keep_order, zip(*sort_reduced_row_lbls_))
        statename_dict = dict(zip(new_vars, var_states))
        cardinality = ut.lmap(len, var_states)
        val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values))
        values = np.zeros(np.prod(cardinality))
        for idx, state in enumerate(ut.iprod(*var_states)):
            if state in val_lookup:
                values[idx] = val_lookup[state]
        joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict)
        print(joint2)
        max_marginals = {}
        for i, var in enumerate(query_name_vars):
            one_out = query_name_vars[:i] + query_name_vars[i + 1:]
            max_marginals[var] = joint2.marginalize(one_out, inplace=False)
            # max_marginals[var] = joint2.maximize(one_out, inplace=False)
        print(joint2.marginalize(['Nb', 'Nc'], inplace=False))
        factor_list = max_marginals.values()

        # Better map assignment based on knowledge of labels
        map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0]))

        sort_reduced_rowstr_lbls = [
            ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True,
                     strvals=True)
            for lbls in sort_reduced_row_lbls_
        ]

        top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values))
        if len(sort_reduced_values) > 3:
            top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))]

        # import utool
        # utool.embed()

        # Compute all marginals
        # probs = infr.query(query_vars, evidence)
        #probs = infr.query(query_vars, evidence)
        # factor_list = probs.values()

        ## Marginalize over non-query, non-evidence
        #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars)
        #joint.marginalize(irrelevant_vars)
        #joint.normalize()
        #new_rows = joint._row_labels()
        #new_vals = joint.values.ravel()
        #map_vals = new_rows[new_vals.argmax()]
        #map_assign = dict(zip(joint.variables, map_vals))
        # Compute Marginalized MAP joints
        #marginalized_joints = {}
        #for ttype in interest_ttypes:
        #    other_vars = [v for v in joint_factor.scope()
        #                  if model.var2_cpd[v].ttype != ttype]
        #    marginal = joint_factor.marginalize(other_vars, inplace=False)
        #    marginalized_joints[ttype] = marginal
        query_results = {
            'factor_list': factor_list,
            'top_assignments': top_assignments,
            'map_assign': map_assign,
            'marginalized_joints': None,
        }
        return query_results
예제 #28
0
def get_automatch_candidates(cm_list, ranks_lt=5, directed=True,
                             name_scoring=False, ibs=None, filter_reviewed=False,
                             filter_duplicate_namepair_matches=False):
    """
    THIS IS PROBABLY ONE OF THE ONLY THINGS IN THIS FILE THAT SHOULD NOT BE
    DEPRICATED

    Returns a list of matches that should be inspected
    This function is more lightweight than orgres or allres.
    Used in inspect_gui and interact_qres2

    Args:
        qaid2_qres (dict): mapping from query annotaiton id to query result object
        ranks_lt (int): put all ranks less than this number into the graph
        directed (bool):

    Returns:
        tuple: candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr)

    CommandLine:
        python -m ibeis.expt.results_organizer --test-get_automatch_candidates:2
        python -m ibeis.expt.results_organizer --test-get_automatch_candidates:0

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_()
        >>> cm_list = ibs.query_chips(qreq_=qreq_, return_cm=True)
        >>> ranks_lt = 5
        >>> directed = True
        >>> name_scoring = False
        >>> candidate_matches = get_automatch_candidates(cm_list, ranks_lt, directed, ibs=ibs)
        >>> print(candidate_matches)

    Example1:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:5]
        >>> daid_list = ibs.get_valid_aids()[0:20]
        >>> cm_list = ibs.query_chips(qaid_list, daid_list, return_cm=True)
        >>> ranks_lt = 5
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    cm_list, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)

    Example3:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:1]
        >>> daid_list = ibs.get_valid_aids()[10:100]
        >>> qaid2_cm = ibs.query_chips(qaid_list, daid_list, return_cm=True)
        >>> ranks_lt = 1
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    cm_list, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)

    Example4:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:10]
        >>> daid_list = ibs.get_valid_aids()[0:10]
        >>> qres_list = ibs.query_chips(qaid_list, daid_list)
        >>> ranks_lt = 3
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    qaid2_cm, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)
    """
    import vtool as vt
    from ibeis.model.hots import chip_match
    print(('[resorg] get_automatch_candidates('
           'filter_reviewed={filter_reviewed},'
           'filter_duplicate_namepair_matches={filter_duplicate_namepair_matches},'
           'directed={directed},'
           'ranks_lt={ranks_lt},'
           ).format(**locals()))
    print('[resorg] len(cm_list) = %d' % (len(cm_list)))
    qaids_stack  = []
    daids_stack  = []
    ranks_stack  = []
    scores_stack = []

    # For each QueryResult, Extract inspectable candidate matches
    if isinstance(cm_list, dict):
        cm_list = list(cm_list.values())

    for cm in cm_list:
        if isinstance(cm, chip_match.ChipMatch2):
            daids  = cm.get_top_aids(ntop=ranks_lt)
            scores = cm.get_top_scores(ntop=ranks_lt)
            ranks  = np.arange(len(daids))
            qaids  = np.full(daids.shape, cm.qaid, dtype=daids.dtype)
        else:
            (qaids, daids, scores, ranks) = cm.get_match_tbldata(
                ranks_lt=ranks_lt, name_scoring=name_scoring, ibs=ibs)
        qaids_stack.append(qaids)
        daids_stack.append(daids)
        scores_stack.append(scores)
        ranks_stack.append(ranks)

    # Stack them into a giant array
    # utool.embed()
    qaid_arr  = np.hstack(qaids_stack)
    daid_arr  = np.hstack(daids_stack)
    score_arr = np.hstack(scores_stack)
    rank_arr  = np.hstack(ranks_stack)

    # Sort by scores
    sortx = score_arr.argsort()[::-1]
    qaid_arr  = qaid_arr[sortx]
    daid_arr   = daid_arr[sortx]
    score_arr = score_arr[sortx]
    rank_arr  = rank_arr[sortx]

    if filter_reviewed:
        _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist())
        is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool)
        qaid_arr  = qaid_arr.compress(is_unreviewed)
        daid_arr   = daid_arr.compress(is_unreviewed)
        score_arr = score_arr.compress(is_unreviewed)
        rank_arr  = rank_arr.compress(is_unreviewed)

    # Remove directed edges
    if not directed:
        #nodes = np.unique(directed_edges.flatten())
        directed_edges = np.vstack((qaid_arr, daid_arr)).T
        #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1])

        unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr)

        qaid_arr  = qaid_arr.take(unique_rowx)
        daid_arr  = daid_arr.take(unique_rowx)
        score_arr = score_arr.take(unique_rowx)
        rank_arr  = rank_arr.take(unique_rowx)

    # Filter Double Name Matches
    if filter_duplicate_namepair_matches:
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        if not directed:
            directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T
            unique_rowx2 = vt.find_best_undirected_edge_indexes(directed_name_edges, score_arr)
        else:
            namepair_id_list = np.array(vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr))))
            unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list)
            score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs)
            unique_rowx2 = np.array(sorted([
                groupx[score_group.argmax()]
                for groupx, score_group in zip(namepair_groupxs, score_namepair_groups)
            ]), dtype=np.int32)
        qaid_arr  = qaid_arr.take(unique_rowx2)
        daid_arr  = daid_arr.take(unique_rowx2)
        score_arr = score_arr.take(unique_rowx2)
        rank_arr  = rank_arr.take(unique_rowx2)

    candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr)
    return candidate_matches