def index_data_annots(annots_df, daids, words): vecs_list = annots_df['vecs'][daids] flann_params = {} wordflann = vtool.nearest_neighbors.flann_cache(words, flann_params=flann_params) ax2_aid = np.array(daids) idx2_vec, idx2_ax, idx2_fx = nntool.invertible_stack(vecs_list, np.arange(len(ax2_aid))) invindex = InvertedIndex(words, wordflann, idx2_vec, idx2_ax, idx2_fx, ax2_aid) invindex.compute_internals() return invindex
def invert_index(vecs_list, ax_list): """ Aggregates descriptors of input annotations and returns inverted information """ if utool.NOT_QUIET: print('[hsnbrx] stacking descriptors from %d annotations' % len(ax_list)) try: idx2_vec, idx2_ax, idx2_fx = nntool.invertible_stack(vecs_list, ax_list) assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: utool.printex(ex, 'cannot build inverted index', '[!memerror]') raise if utool.NOT_QUIET: print('stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list))) return idx2_vec, idx2_ax, idx2_fx
def index_data_annots(annots_df, daids, words, qparams, with_internals=True, memtrack=None, delete_rawvecs=False): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure Args: annots_df (): daids (): words (): qparams (): with_internals (): memtrack (): memory debugging object Returns: invindex Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words() >>> qparams = qreq_.qparams >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals) Ignore: #>>> print(ut.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio Auto: from ibeis.algo.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.index_data_annots)) """ if not ut.QUIET: print('[smk_repr] index_data_annots') flann_params = {} # Compute fast lookup index for the words wordflann = nntool.flann_cache(words, flann_params=flann_params, appname='smk') _vecs_list = annots_df['vecs'][daids] _label_list = annots_df['labels'][daids] idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids) # TODO: # Need to individually cache residual vectors. # rvecs_list = annots_df['rvecs'][daids] # # Residual vectors depend on # * nearest word (word assignment) # * original vectors # * multiassignment daid2_label = dict(zip(daids, _label_list)) invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label) # Decrement reference count so memory can be cleared in the next function del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label del _vecs_list, _label_list if with_internals: compute_data_internals_(invindex, qparams, memtrack=memtrack, delete_rawvecs=delete_rawvecs) # 99% return invindex