def testdata_match_kernel_L0(): from ibeis.algo.hots.smk import smk_debug from ibeis.algo.hots import hstypes np.random.seed(0) smk_alpha = 3.0 smk_thresh = 0.0 num_qrvecs_per_word = [0, 1, 3, 4, 5] num_drvecs_per_word = [0, 1, 2, 4, 6] qrvecs_list = [ smk_debug.get_test_rvecs(n, dim=2) for n in num_qrvecs_per_word ] drvecs_list = [ smk_debug.get_test_rvecs(n, dim=2) for n in num_drvecs_per_word ] daids_list = [list(range(len(rvecs))) for rvecs in drvecs_list] qaids_list = [[42] * len(rvecs) for rvecs in qrvecs_list] qmaws_list = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list] dmaws_list = [ np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE) for rvecs in drvecs_list ] idf_list = [1.0 for _ in qrvecs_list] daid2_sccw = {daid: 1.0 for daid in range(10)} query_sccw = smk_scoring.sccw_summation(qrvecs_list, idf_list, qmaws_list, smk_alpha, smk_thresh) qaid2_sccw = {42: query_sccw} core1 = smk_alpha, smk_thresh, query_sccw, daids_list, daid2_sccw core2 = qrvecs_list, drvecs_list, qmaws_list, dmaws_list, idf_list extra = qaid2_sccw, qaids_list return core1, core2, extra
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ """ if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) with ut.Timer('timer_orig1'): wx_sublist = np.array(wx2_drvecs.keys()) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if ut.VERBOSE or verbose: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx_sublist), freq=100, with_time=WITH_TOTALTIME) # Get list of aids and rvecs w.r.t. words aids_list = [wx2_aids[wx] for wx in wx_sublist] rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.assert_single_assigned_maws(maws_list) # Group by daids first and then by word index daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1) if ut.VERBOSE or verbose: end1_() # For every daid, compute its sccw using pregrouped rvecs # Summation over words for each aid if ut.VERBOSE or verbose: mark2, end2_ = ut.log_progress( '[smk_index.sccw] SCCW Sum (over daid): ', len(daid2_wx2_drvecs), freq=25, with_time=WITH_TOTALTIME) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] with ut.Timer('timer_orig2'): if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(aididf_list, aidrvecs_list) # TODO: implement database side soft-assign sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] daid2_sccw = dict(zip(aid_list, sccw_list)) if ut.VERBOSE or verbose: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ """ if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) with ut.Timer('timer_orig1'): wx_sublist = np.array(wx2_drvecs.keys()) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if ut.VERBOSE or verbose: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Get list of aids and rvecs w.r.t. words aids_list = [wx2_aids[wx] for wx in wx_sublist] rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.assert_single_assigned_maws(maws_list) # Group by daids first and then by word index daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1) # For every daid, compute its sccw using pregrouped rvecs # Summation over words for each aid if ut.VERBOSE or verbose: print('[smk_index.sccw] SCCW Sum (over daid): ') # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] with ut.Timer('timer_orig2'): if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(aididf_list, aidrvecs_list) # TODO: implement database side soft-assign sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] daid2_sccw = dict(zip(aid_list, sccw_list)) if ut.VERBOSE or verbose: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def testdata_match_kernel_L0(): from ibeis.algo.hots.smk import smk_debug from ibeis.algo.hots import hstypes np.random.seed(0) smk_alpha = 3.0 smk_thresh = 0.0 num_qrvecs_per_word = [0, 1, 3, 4, 5] num_drvecs_per_word = [0, 1, 2, 4, 6] qrvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_qrvecs_per_word] drvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_drvecs_per_word] daids_list = [list(range(len(rvecs))) for rvecs in drvecs_list] qaids_list = [[42] * len(rvecs) for rvecs in qrvecs_list] qmaws_list = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list] dmaws_list = [np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE) for rvecs in drvecs_list] idf_list = [1.0 for _ in qrvecs_list] daid2_sccw = {daid: 1.0 for daid in range(10)} query_sccw = smk_scoring.sccw_summation(qrvecs_list, idf_list, qmaws_list, smk_alpha, smk_thresh) qaid2_sccw = {42: query_sccw} core1 = smk_alpha, smk_thresh, query_sccw, daids_list, daid2_sccw core2 = qrvecs_list, drvecs_list, qmaws_list, dmaws_list, idf_list extra = qaid2_sccw, qaids_list return core1, core2, extra
def new_qindex(annots_df, qaid, invindex, qparams): r""" Gets query read for computations Args: annots_df (DataFrameProxy): pandas-like data interface qaid (int): query annotation id invindex (InvertedIndex): inverted index object qparams (QueryParams): query parameters object Returns: qindex: named tuple containing query information CommandLine: python -m ibeis.algo.hots.smk.smk_repr --test-new_qindex Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000) >>> qindex = new_qindex(annots_df, qaid, invindex, qparams) >>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan' >>> smk_debug.invindex_dbgstr(invindex) Ignore:: idx2_vec = qfx2_vec idx2_aid = qfx2_aid idx2_fx = qfx2_qfx wx2_idxs = _wx2_qfxs wx2_maws = _wx2_maws from ibeis.algo.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.new_qindex)) """ # TODO: Precompute and lookup residuals and assignments if not ut.QUIET: print('[smk_repr] Query Repr qaid=%r' % (qaid,)) # nAssign = qparams.nAssign massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # wx2_idf = invindex.wx2_idf words = invindex.words wordflann = invindex.wordflann #qfx2_vec = annots_df['vecs'][qaid] # TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_ qfx2_vec = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams) #------------------- # Assign query to (multiple) words #------------------- _wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_( wordflann, words, qfx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) # Hack to make implementing asmk easier, very redundant qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE) qfx2_qfx = np.arange(len(qfx2_vec)) #------------------- # Compute query residuals #------------------- wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_( words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # each value in wx2_ dicts is a list with len equal to the number of rvecs if ut.VERBOSE: print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh)) #------------------- # Compute query sccw #------------------- wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE) idf_list = [wx2_idf[wx] for wx in wx_sublist] rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist] maws_list = [wx2_maws[wx] for wx in wx_sublist] flags_list = [wx2_qflags[wx] for wx in wx_sublist] query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) try: assert query_sccw > 0, 'query_sccw=%r is not positive!' % (query_sccw,) except Exception as ex: ut.printex(ex) raise #------------------- # Build query representationm class/tuple #------------------- if DEBUG_SMK: from ibeis.algo.hots.smk import smk_debug qfx2_vec = annots_df['vecs'][qaid] assert smk_debug.check_wx2_rvecs2( invindex, wx2_qrvecs, wx2_qfxs, qfx2_vec), 'bad qindex' qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs, query_sccw) return qindex
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs), freq=100, with_time=WITH_TOTALTIME) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: end1_() mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME) progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def new_qindex(annots_df, qaid, invindex, qparams): r""" Gets query read for computations Args: annots_df (DataFrameProxy): pandas-like data interface qaid (int): query annotation id invindex (InvertedIndex): inverted index object qparams (QueryParams): query parameters object Returns: qindex: named tuple containing query information CommandLine: python -m ibeis.algo.hots.smk.smk_repr --test-new_qindex Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000) >>> qindex = new_qindex(annots_df, qaid, invindex, qparams) >>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan' >>> smk_debug.invindex_dbgstr(invindex) Ignore:: idx2_vec = qfx2_vec idx2_aid = qfx2_aid idx2_fx = qfx2_qfx wx2_idxs = _wx2_qfxs wx2_maws = _wx2_maws from ibeis.algo.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.new_qindex)) """ # TODO: Precompute and lookup residuals and assignments if not ut.QUIET: print('[smk_repr] Query Repr qaid=%r' % (qaid, )) # nAssign = qparams.nAssign massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # wx2_idf = invindex.wx2_idf words = invindex.words wordflann = invindex.wordflann #qfx2_vec = annots_df['vecs'][qaid] # TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_ qfx2_vec = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams) #------------------- # Assign query to (multiple) words #------------------- _wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_( wordflann, words, qfx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) # Hack to make implementing asmk easier, very redundant qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE) qfx2_qfx = np.arange(len(qfx2_vec)) #------------------- # Compute query residuals #------------------- wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_( words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # each value in wx2_ dicts is a list with len equal to the number of rvecs if ut.VERBOSE: print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh)) #------------------- # Compute query sccw #------------------- wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE) idf_list = [wx2_idf[wx] for wx in wx_sublist] rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist] maws_list = [wx2_maws[wx] for wx in wx_sublist] flags_list = [wx2_qflags[wx] for wx in wx_sublist] query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) try: assert query_sccw > 0, 'query_sccw=%r is not positive!' % ( query_sccw, ) except Exception as ex: ut.printex(ex) raise #------------------- # Build query representationm class/tuple #------------------- if DEBUG_SMK: from ibeis.algo.hots.smk import smk_debug qfx2_vec = annots_df['vecs'][qaid] assert smk_debug.check_wx2_rvecs2(invindex, wx2_qrvecs, wx2_qfxs, qfx2_vec), 'bad qindex' qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs, query_sccw) return qindex