Exemplo n.º 1
0
def testdata_match_kernel_L0():
    from ibeis.algo.hots.smk import smk_debug
    from ibeis.algo.hots import hstypes
    np.random.seed(0)
    smk_alpha = 3.0
    smk_thresh = 0.0
    num_qrvecs_per_word = [0, 1, 3, 4, 5]
    num_drvecs_per_word = [0, 1, 2, 4, 6]
    qrvecs_list = [
        smk_debug.get_test_rvecs(n, dim=2) for n in num_qrvecs_per_word
    ]
    drvecs_list = [
        smk_debug.get_test_rvecs(n, dim=2) for n in num_drvecs_per_word
    ]
    daids_list = [list(range(len(rvecs))) for rvecs in drvecs_list]
    qaids_list = [[42] * len(rvecs) for rvecs in qrvecs_list]
    qmaws_list = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list]
    dmaws_list = [
        np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE)
        for rvecs in drvecs_list
    ]
    idf_list = [1.0 for _ in qrvecs_list]
    daid2_sccw = {daid: 1.0 for daid in range(10)}
    query_sccw = smk_scoring.sccw_summation(qrvecs_list, idf_list, qmaws_list,
                                            smk_alpha, smk_thresh)
    qaid2_sccw = {42: query_sccw}
    core1 = smk_alpha, smk_thresh, query_sccw, daids_list, daid2_sccw
    core2 = qrvecs_list, drvecs_list, qmaws_list, dmaws_list, idf_list
    extra = qaid2_sccw, qaids_list
    return core1, core2, extra
Exemplo n.º 2
0
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
            mark1, end1_ = ut.log_progress(
                '[smk_index.sccw] SCCW group (by present words): ', len(wx_sublist),
                freq=100, with_time=WITH_TOTALTIME)
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        if ut.VERBOSE or verbose:
            end1_()

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            mark2, end2_ = ut.log_progress(
                '[smk_index.sccw] SCCW Sum (over daid): ', len(daid2_wx2_drvecs),
                freq=25, with_time=WITH_TOTALTIME)
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
Exemplo n.º 3
0
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] SCCW Sum (over daid): ')
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
Exemplo n.º 4
0
def testdata_match_kernel_L0():
    from ibeis.algo.hots.smk import smk_debug
    from ibeis.algo.hots import hstypes
    np.random.seed(0)
    smk_alpha = 3.0
    smk_thresh = 0.0
    num_qrvecs_per_word = [0, 1, 3, 4, 5]
    num_drvecs_per_word = [0, 1, 2, 4, 6]
    qrvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_qrvecs_per_word]
    drvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_drvecs_per_word]
    daids_list  = [list(range(len(rvecs))) for rvecs in drvecs_list]
    qaids_list  = [[42] * len(rvecs) for rvecs in qrvecs_list]
    qmaws_list  = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list]
    dmaws_list  = [np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE) for rvecs in drvecs_list]
    idf_list = [1.0 for _ in qrvecs_list]
    daid2_sccw  = {daid: 1.0 for daid in range(10)}
    query_sccw = smk_scoring.sccw_summation(qrvecs_list, idf_list, qmaws_list, smk_alpha, smk_thresh)
    qaid2_sccw  = {42: query_sccw}
    core1 = smk_alpha, smk_thresh, query_sccw, daids_list, daid2_sccw
    core2 = qrvecs_list, drvecs_list, qmaws_list, dmaws_list, idf_list
    extra = qaid2_sccw, qaids_list
    return core1, core2, extra
Exemplo n.º 5
0
def new_qindex(annots_df, qaid, invindex, qparams):
    r"""
    Gets query read for computations

    Args:
        annots_df (DataFrameProxy): pandas-like data interface
        qaid (int): query annotation id
        invindex (InvertedIndex): inverted index object
        qparams (QueryParams): query parameters object

    Returns:
        qindex: named tuple containing query information

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_repr --test-new_qindex

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000)
        >>> qindex = new_qindex(annots_df, qaid, invindex, qparams)
        >>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan'
        >>> smk_debug.invindex_dbgstr(invindex)

    Ignore::
        idx2_vec = qfx2_vec
        idx2_aid = qfx2_aid
        idx2_fx  = qfx2_qfx
        wx2_idxs = _wx2_qfxs
        wx2_maws = _wx2_maws
        from ibeis.algo.hots.smk import smk_repr
        import utool as ut
        ut.rrrr()
        print(ut.make_default_docstr(smk_repr.new_qindex))
    """
    # TODO: Precompute and lookup residuals and assignments
    if not ut.QUIET:
        print('[smk_repr] Query Repr qaid=%r' % (qaid,))
    #
    nAssign               = qparams.nAssign
    massign_alpha         = qparams.massign_alpha
    massign_sigma         = qparams.massign_sigma
    massign_equal_weights = qparams.massign_equal_weights
    #
    aggregate             = qparams.aggregate
    smk_alpha             = qparams.smk_alpha
    smk_thresh            = qparams.smk_thresh
    #
    wx2_idf   = invindex.wx2_idf
    words     = invindex.words
    wordflann = invindex.wordflann
    #qfx2_vec  = annots_df['vecs'][qaid]
    # TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_
    qfx2_vec  = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams)
    #-------------------
    # Assign query to (multiple) words
    #-------------------
    _wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_(
        wordflann, words, qfx2_vec, nAssign, massign_alpha,
        massign_sigma, massign_equal_weights)
    # Hack to make implementing asmk easier, very redundant
    qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE)
    qfx2_qfx = np.arange(len(qfx2_vec))
    #-------------------
    # Compute query residuals
    #-------------------
    wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_(
        words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate)
    # each value in wx2_ dicts is a list with len equal to the number of rvecs
    if ut.VERBOSE:
        print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh))
    #-------------------
    # Compute query sccw
    #-------------------
    wx_sublist  = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE)
    idf_list    = [wx2_idf[wx]    for wx in wx_sublist]
    rvecs_list  = [wx2_qrvecs[wx] for wx in wx_sublist]
    maws_list   = [wx2_maws[wx]   for wx in wx_sublist]
    flags_list  = [wx2_qflags[wx] for wx in wx_sublist]
    query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
    try:
        assert query_sccw > 0, 'query_sccw=%r is not positive!' % (query_sccw,)
    except Exception as ex:
        ut.printex(ex)
        raise
    #-------------------
    # Build query representationm class/tuple
    #-------------------
    if DEBUG_SMK:
        from ibeis.algo.hots.smk import smk_debug
        qfx2_vec = annots_df['vecs'][qaid]
        assert smk_debug.check_wx2_rvecs2(
            invindex, wx2_qrvecs, wx2_qfxs, qfx2_vec), 'bad qindex'

    qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs, query_sccw)
    return qindex
Exemplo n.º 6
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        mark1, end1_ = ut.log_progress(
            '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs),
            freq=100, with_time=WITH_TOTALTIME)

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        end1_()
        mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                        total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME)
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        end2_()
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
Exemplo n.º 7
0
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf,
                       wx2_dmaws, smk_alpha, smk_thresh, verbose=False):
    """
    Computes sccw normalization scalar for the database annotations.
    This is gamma from the SMK paper.
    sccw is a self consistency critiron weight --- a scalar which ensures
    the score of K(X, X) = 1

    Args:
        idx2_daid ():
        wx2_drvecs ():
        wx2_aids ():
        wx2_idf ():
        wx2_dmaws ():
        smk_alpha ():
        smk_thresh ():

    Returns:
        daid2_sccw

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_index
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1')
        >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST')
        >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup
        >>> wx2_dflags = invindex.wx2_dflags
        >>> ws2_idxs = invindex.wx2_idxs
        >>> wx2_dmaws  = invindex.wx2_dmaws
        >>> idx2_daid  = invindex.idx2_daid
        >>> daids      = invindex.daids
        >>> smk_alpha  = qparams.smk_alpha
        >>> smk_thresh = qparams.smk_thresh
        >>> wx2_idf    = wx2_idf
        >>> verbose = True
        >>> invindex.invindex_dbgstr()
        >>> invindex.report_memory()
        >>> invindex.report_memsize()
        >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose)
    """

    #for wx in wx_sublist:
    #    print(len(wx2_dmaws

    verbose_ = ut.VERBOSE or verbose

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)
    if not ut.QUIET:
        print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
    if verbose_:
        print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))

    # Group by daids first and then by word index
    # Get list of aids and rvecs w.r.t. words (ie one item per word)
    wx_sublist = np.array(list(wx2_drvecs.keys()))
    aids_perword  = [wx2_aids[wx] for wx in wx_sublist]

    # wx_list1: Lays out word indexes for each annotation
    # tx_list1: Temporary within annotation subindex + wx uniquely identifies
    # item in wx2_drvecs, wx2_dflags, and wx2_dmaws

    # Flatten out indexes to perform grouping
    flat_aids = np.hstack(aids_perword)
    count = len(flat_aids)
    txs_perword = [np.arange(aids.size) for aids in aids_perword]
    flat_txs  = np.hstack(txs_perword)
    # fromiter is faster for flat_wxs because is not a list of numpy arrays
    wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword))
    flat_wxs  = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count)

    # Group flat indexes by annotation id
    unique_aids, annot_groupxs = clustertool.group_indices(flat_aids)

    # Wxs and Txs grouped by annotation id
    wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs)
    txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs)

    # Group by word inside each annotation group
    wxsubgrouping_perannot = [clustertool.group_indices(wxs)
                              for wxs in wxs_perannot]
    word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot)
    txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs)
                            for txs, groupxs in
                            zip(txs_perannot, word_groupxs_perannot)]
    wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot]

    # Group relavent data for sccw measure by word for each annotation grouping

    def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot):
        return [[wx2_arr[wx].take(txs, axis=0)
                 for wx, txs in zip(wx_perword_, txs_perword_)]
                for wx_perword_, txs_perword_ in
                zip(wxs_perword_perannot, txs_perword_perannot)]

    def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot):
        return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot]

    subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot)
    subgrouped_dmaws  = _vector_subgroup_by_wx(wx2_dmaws,  wxs_perword_perannot, txs_perword_perannot)
    # If we aren't using dmaws replace it with an infinite None iterator
    #subgrouped_dmaws  = iter(lambda: None, 1)
    subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot)
    #subgrouped_dflags  = iter(lambda: None, 1)
    subgrouped_idfs   = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot)

    if verbose_:
        progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ',
                                   total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME)
    else:
        progiter = ut.identity

    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs)

    sccw_list = [
        smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
        for rvecs_list, flags_list, maws_list, idf_list in
        progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs))
    ]
    daid2_sccw = dict(zip(unique_aids, sccw_list))

    if verbose_:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')

    return daid2_sccw
Exemplo n.º 8
0
def new_qindex(annots_df, qaid, invindex, qparams):
    r"""
    Gets query read for computations

    Args:
        annots_df (DataFrameProxy): pandas-like data interface
        qaid (int): query annotation id
        invindex (InvertedIndex): inverted index object
        qparams (QueryParams): query parameters object

    Returns:
        qindex: named tuple containing query information

    CommandLine:
        python -m ibeis.algo.hots.smk.smk_repr --test-new_qindex

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_repr import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000)
        >>> qindex = new_qindex(annots_df, qaid, invindex, qparams)
        >>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan'
        >>> smk_debug.invindex_dbgstr(invindex)

    Ignore::
        idx2_vec = qfx2_vec
        idx2_aid = qfx2_aid
        idx2_fx  = qfx2_qfx
        wx2_idxs = _wx2_qfxs
        wx2_maws = _wx2_maws
        from ibeis.algo.hots.smk import smk_repr
        import utool as ut
        ut.rrrr()
        print(ut.make_default_docstr(smk_repr.new_qindex))
    """
    # TODO: Precompute and lookup residuals and assignments
    if not ut.QUIET:
        print('[smk_repr] Query Repr qaid=%r' % (qaid, ))
    #
    nAssign = qparams.nAssign
    massign_alpha = qparams.massign_alpha
    massign_sigma = qparams.massign_sigma
    massign_equal_weights = qparams.massign_equal_weights
    #
    aggregate = qparams.aggregate
    smk_alpha = qparams.smk_alpha
    smk_thresh = qparams.smk_thresh
    #
    wx2_idf = invindex.wx2_idf
    words = invindex.words
    wordflann = invindex.wordflann
    #qfx2_vec  = annots_df['vecs'][qaid]
    # TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_
    qfx2_vec = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams)
    #-------------------
    # Assign query to (multiple) words
    #-------------------
    _wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_(
        wordflann, words, qfx2_vec, nAssign, massign_alpha, massign_sigma,
        massign_equal_weights)
    # Hack to make implementing asmk easier, very redundant
    qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE)
    qfx2_qfx = np.arange(len(qfx2_vec))
    #-------------------
    # Compute query residuals
    #-------------------
    wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_(
        words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate)
    # each value in wx2_ dicts is a list with len equal to the number of rvecs
    if ut.VERBOSE:
        print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' %
              (smk_alpha, smk_thresh))
    #-------------------
    # Compute query sccw
    #-------------------
    wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE)
    idf_list = [wx2_idf[wx] for wx in wx_sublist]
    rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist]
    maws_list = [wx2_maws[wx] for wx in wx_sublist]
    flags_list = [wx2_qflags[wx] for wx in wx_sublist]
    query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list,
                                            maws_list, smk_alpha, smk_thresh)
    try:
        assert query_sccw > 0, 'query_sccw=%r is not positive!' % (
            query_sccw, )
    except Exception as ex:
        ut.printex(ex)
        raise
    #-------------------
    # Build query representationm class/tuple
    #-------------------
    if DEBUG_SMK:
        from ibeis.algo.hots.smk import smk_debug
        qfx2_vec = annots_df['vecs'][qaid]
        assert smk_debug.check_wx2_rvecs2(invindex, wx2_qrvecs, wx2_qfxs,
                                          qfx2_vec), 'bad qindex'

    qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs,
                        query_sccw)
    return qindex