def testdata_raw_internals1_5(**kwargs): """ contains internal data up to idf weights Example: >>> from ibeis.algo.hots.smk.smk_debug import * # NOQA """ from ibeis.algo.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1(**kwargs) print('[smk_debug] testdata_raw_internals1_5') words = invindex.words wx_series = np.arange(len(words)) idx2_aid = invindex.idx2_daid wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_aid, daids) invindex.wx2_idf = wx2_idf return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
def testdata_raw_internals1_5(**kwargs): """ contains internal data up to idf weights Example: >>> from ibeis.algo.hots.smk.smk_debug import * # NOQA """ from ibeis.algo.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1( **kwargs) print('[smk_debug] testdata_raw_internals1_5') words = invindex.words wx_series = np.arange(len(words)) idx2_aid = invindex.idx2_daid wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_aid, daids) invindex.wx2_idf = wx2_idf return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
def compute_data_internals_(invindex, qparams, memtrack=None, delete_rawvecs=True): """ Builds each of the inverted index internals. invindex (InvertedIndex): object for fast vocab lookup qparams (QueryParams): hyper-parameters memtrack (None): delete_rawvecs (bool): Returns: None Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> compute_data_internals_(invindex, qreq_.qparams) Ignore: idx2_vec = idx2_dvec wx2_maws = _wx2_maws # NOQA """ # Get information #if memtrack is None: # memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]') #memtrack.report('[DATA INTERNALS1]') # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # vocab_weighting = qparams.vocab_weighting # nAssign = 1 # single assignment for database side idx2_vec = invindex.idx2_dvec idx2_dfx = invindex.idx2_dfx idx2_daid = invindex.idx2_daid daids = invindex.daids wordflann = invindex.wordflann words = invindex.words daid2_label = invindex.daid2_label wx_series = np.arange(len(words)) #memtrack.track_obj(idx2_vec, 'idx2_vec') if not ut.QUIET: print('[smk_repr] compute_data_internals_') if ut.VERBOSE: print('[smk_repr] * len(daids) = %r' % (len(daids),)) print('[smk_repr] * len(words) = %r' % (len(words),)) print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec),)) print('[smk_repr] * aggregate = %r' % (aggregate,)) print('[smk_repr] * smk_alpha = %r' % (smk_alpha,)) print('[smk_repr] * smk_thresh = %r' % (smk_thresh,)) # Try to use the cache #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr #cachekw = dict( #cfgstr=cfgstr, #appname='smk_test' #) #invindex_cache = ut.Cacher('inverted_index', **cachekw) #try: # raise IOError('cache is off') # #cachetup = invindex_cache.load() # #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup # invindex.idx2_dvec = None #except IOError as ex: # Database word assignments (perform single assignment on database side) wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_( wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) if ut.DEBUG2: assert len(idx2_wxs) == len(idx2_vec) assert len(wx2_idxs.keys()) == len(_wx2_maws.keys()) assert len(wx2_idxs.keys()) <= len(words) try: assert len(wx2_idxs.keys()) == len(words) except AssertionError as ex: ut.printex(ex, iswarning=True) # Database word inverse-document-frequency (idf weights) wx2_idf = smk_index.compute_word_idf_( wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting, verbose=True) if ut.DEBUG2: assert len(wx2_idf) == len(wx2_idf.keys()) # Compute (normalized) residual vectors and inverse mappings wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx, aggregate, verbose=True) if not ut.QUIET: print('[smk_repr] unloading idx2_vec') if delete_rawvecs: # Try to save some memory del _wx2_maws invindex.idx2_dvec = None del idx2_vec # Compute annotation normalization factor daid2_sccw = smk_index.compute_data_sccw_( idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=True) # Cache save #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw) #invindex_cache.save(cachetup) # Store information invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed) invindex.wx2_idxs = wx2_idxs invindex.wx2_idf = wx2_idf invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags # flag nan rvecs invindex.wx2_aids = wx2_aids # needed for asmk invindex.wx2_fxs = wx2_fxs # needed for asmk invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk invindex.daid2_sccw = daid2_sccw #memtrack.report('[DATA INTERNALS3]') if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_invindex_wx2(invindex)
def compute_data_internals_(invindex, qparams, memtrack=None, delete_rawvecs=True): """ Builds each of the inverted index internals. invindex (InvertedIndex): object for fast vocab lookup qparams (QueryParams): hyper-parameters memtrack (None): delete_rawvecs (bool): Returns: None Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> compute_data_internals_(invindex, qreq_.qparams) Ignore: idx2_vec = idx2_dvec wx2_maws = _wx2_maws # NOQA """ # Get information #if memtrack is None: # memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]') #memtrack.report('[DATA INTERNALS1]') # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # vocab_weighting = qparams.vocab_weighting # nAssign = 1 # single assignment for database side idx2_vec = invindex.idx2_dvec idx2_dfx = invindex.idx2_dfx idx2_daid = invindex.idx2_daid daids = invindex.daids wordflann = invindex.wordflann words = invindex.words daid2_label = invindex.daid2_label wx_series = np.arange(len(words)) #memtrack.track_obj(idx2_vec, 'idx2_vec') if not ut.QUIET: print('[smk_repr] compute_data_internals_') if ut.VERBOSE: print('[smk_repr] * len(daids) = %r' % (len(daids), )) print('[smk_repr] * len(words) = %r' % (len(words), )) print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec), )) print('[smk_repr] * aggregate = %r' % (aggregate, )) print('[smk_repr] * smk_alpha = %r' % (smk_alpha, )) print('[smk_repr] * smk_thresh = %r' % (smk_thresh, )) # Try to use the cache #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr #cachekw = dict( #cfgstr=cfgstr, #appname='smk_test' #) #invindex_cache = ut.Cacher('inverted_index', **cachekw) #try: # raise IOError('cache is off') # #cachetup = invindex_cache.load() # #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup # invindex.idx2_dvec = None #except IOError as ex: # Database word assignments (perform single assignment on database side) wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_( wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) if ut.DEBUG2: assert len(idx2_wxs) == len(idx2_vec) assert len(wx2_idxs.keys()) == len(_wx2_maws.keys()) assert len(wx2_idxs.keys()) <= len(words) try: assert len(wx2_idxs.keys()) == len(words) except AssertionError as ex: ut.printex(ex, iswarning=True) # Database word inverse-document-frequency (idf weights) wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting, verbose=True) if ut.DEBUG2: assert len(wx2_idf) == len(wx2_idf.keys()) # Compute (normalized) residual vectors and inverse mappings wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx, aggregate, verbose=True) if not ut.QUIET: print('[smk_repr] unloading idx2_vec') if delete_rawvecs: # Try to save some memory del _wx2_maws invindex.idx2_dvec = None del idx2_vec # Compute annotation normalization factor daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=True) # Cache save #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw) #invindex_cache.save(cachetup) # Store information invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed) invindex.wx2_idxs = wx2_idxs invindex.wx2_idf = wx2_idf invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags # flag nan rvecs invindex.wx2_aids = wx2_aids # needed for asmk invindex.wx2_fxs = wx2_fxs # needed for asmk invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk invindex.daid2_sccw = daid2_sccw #memtrack.report('[DATA INTERNALS3]') if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_invindex_wx2(invindex)