def __init__(vocab, words=None): vocab.wx_to_word = words vocab.wordflann = None vocab.flann_params = vt.get_flann_params(random_seed=42) vocab.flann_params['checks'] = 1024 vocab.flann_params['trees'] = 8
def subindexer_time_experiment(): """ builds plot of number of annotations vs indexer build time. TODO: time experiment """ import ibeis import utool as ut import pyflann import plottool as pt ibs = ibeis.opendb(db='PZ_Master0') daid_list = ibs.get_valid_aids() count_list = [] time_list = [] flann_params = vt.get_flann_params() for count in ut.ProgressIter(range(1, 301)): daids_ = daid_list[:] np.random.shuffle(daids_) daids = daids_[0:count] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) count_list.append(count) time_list.append(t.ellapsed) count_arr = np.array(count_list) time_arr = np.array(time_list) pt.plot2(count_arr, time_arr, marker='-', equal_aspect=False, x_label='num_annotations', y_label='FLANN build time')
def get_buildtime_data(**kwargs): flann_params = vt.get_flann_params(**kwargs) print('flann_params = %r' % (ut.dict_str(flann_params), )) data_list = [] num = 1000 print('-----') for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False): num = int(num * 1.2) print('num = %r' % (num, )) #if num > 1E6: # break data = pool.get_testdata(num) print('object size ' + ut.get_object_size_str(data, 'data')) flann = pyflann.FLANN(**flann_params) with ut.Timer(verbose=False) as t: flann.build_index(data) print('t.ellapsed = %r' % (t.ellapsed, )) if t.ellapsed > 5 or count > 1000: break data_list.append((count, num, t.ellapsed)) print('-----') return data_list, flann_params
def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m wbia.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> cfgdict = dict(fg_on=False) >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params( nnindexer.flann_params['algorithm']) # flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.update_existing(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([ (shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean) ]) flann_valsig_ = ut.repr2(short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') # flann_valsig_ = str(list(flann_params.values())) # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def get_cfgstr(nnindexer, noquery=False): r""" returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m ibeis.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import ibeis >>> cfgdict = dict(fg_on=False) >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm']) #flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.updateif_haskey(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = ut.odict([(shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean)]) flann_valsig_ = ut.dict_str( short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') #flann_valsig_ = str(list(flann_params.values())) #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
def compute_vocab(depc, fid_list, config): r""" Args: depc (dtool.DependencyCache): fids_list (list): config (dtool.Config): CommandLine: python -m ibeis.core_annots --exec-compute_neighbor_index --show python -m ibeis.control.IBEISControl --test-show_depc_annot_table_input --show --tablename=neighbor_index Example: >>> # DISABLE_DOCTEST >>> from ibeis.new_annots import * # NOQA >>> import ibeis >>> ibs, aid_list = ibeis.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> fid_list = depc.get_rowids('feat', aid_list) >>> config = VocabConfig() >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, key_list=['vocab', 'train_vecs']) >>> idx2_vec = depc.d.get_feat_vecs(aid_list)[0] >>> self = vocab >>> ut.quit_if_noshow() >>> data = train_vecs >>> centroids = vocab.wx2_word >>> import plottool as pt >>> vt.plot_centroids(data, centroids, num_pca_dims=2) >>> ut.show_if_requested() >>> #config = ibs.depc_annot['vocab'].configclass() >>> """ print('[IBEIS] COMPUTE_VOCAB:') vecs_list = depc.get_native('feat', fid_list, 'vecs') train_vecs = np.vstack(vecs_list) num_words = config['num_words'] max_iters = 100 print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (num_words, len(fid_list), len(train_vecs))) flann_params = vt.get_flann_params(random_seed=42) kwds = dict( max_iters=max_iters, flann_params=flann_params ) words = vt.akmeans(train_vecs, num_words, **kwds) vocab = VisualVocab(words) vocab.reindex() return vocab
def compute_vocab(depc, fid_list, config): r""" Args: depc (dtool.DependencyCache): fids_list (list): config (dtool.Config): CommandLine: python -m ibeis.core_annots --exec-compute_neighbor_index --show python -m ibeis.control.IBEISControl --test-show_depc_annot_table_input --show --tablename=neighbor_index Example: >>> # DISABLE_DOCTEST >>> from ibeis.new_annots import * # NOQA >>> import ibeis >>> ibs, aid_list = ibeis.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> fid_list = depc.get_rowids('feat', aid_list) >>> config = VocabConfig() >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, key_list=['vocab', 'train_vecs']) >>> idx2_vec = depc.d.get_feat_vecs(aid_list)[0] >>> self = vocab >>> ut.quit_if_noshow() >>> data = train_vecs >>> centroids = vocab.wx2_word >>> import plottool as pt >>> vt.plot_centroids(data, centroids, num_pca_dims=2) >>> ut.show_if_requested() >>> #config = ibs.depc_annot['vocab'].configclass() >>> """ print('[IBEIS] COMPUTE_VOCAB:') vecs_list = depc.get_native('feat', fid_list, 'vecs') train_vecs = np.vstack(vecs_list) num_words = config['num_words'] max_iters = 100 print( '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (num_words, len(fid_list), len(train_vecs))) flann_params = vt.get_flann_params(random_seed=42) kwds = dict(max_iters=max_iters, flann_params=flann_params) words = vt.akmeans(train_vecs, num_words, **kwds) vocab = VisualVocab(words) vocab.reindex() return vocab
def get_buildtime_data(**kwargs): flann_params = vt.get_flann_params(**kwargs) print('flann_params = %r' % (ut.dict_str(flann_params),)) data_list = [] num = 1000 print('-----') for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False): num = int(num * 1.2) print('num = %r' % (num,)) #if num > 1E6: # break data = pool.get_testdata(num) print('object size ' + ut.get_object_size_str(data, 'data')) flann = pyflann.FLANN(**flann_params) with ut.Timer(verbose=False) as t: flann.build_index(data) print('t.ellapsed = %r' % (t.ellapsed,)) if t.ellapsed > 5 or count > 1000: break data_list.append((count, num, t.ellapsed)) print('-----') return data_list, flann_params
def compute_vocab(depc, fid_list, config): r""" Depcache method for computing a new visual vocab CommandLine: python -m wbia.core_annots --exec-compute_neighbor_index --show python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0 python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1 # FIXME make util_tests register python -m wbia.algo.smk.vocab_indexer compute_vocab:0 Ignore: >>> # Lev Oxford Debug Example >>> import wbia >>> ibs = wbia.opendb('Oxford') >>> depc = ibs.depc >>> table = depc['vocab'] >>> # Check what currently exists in vocab table >>> table.print_configs() >>> table.print_table() >>> table.print_internal_info() >>> # Grab aids used to compute vocab >>> from wbia.expt.experiment_helpers import get_annotcfg_list >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1] >>> qaids, daids = expanded_aids_list[0] >>> vocab_aids = daids >>> config = {'num_words': 64000} >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config) >>> print('exists = %r' % (exists,)) >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0] >>> print('vocab_rowid = %r' % (vocab_rowid,)) >>> vocab = table.get_row_data([vocab_rowid], 'words')[0] >>> print('vocab = %r' % (vocab,)) Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> # Test depcache access >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> input_tuple = [aid_list] >>> rowid_kw = {} >>> tablename = 'vocab' >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw) >>> vocab = depc.get(tablename, input_tuple, 'words')[0] >>> assert vocab.wordflann is not None >>> assert vocab.wordflann._FLANN__curindex_data is not None >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> fid_list = depc.get_rowids('feat', aid_list) >>> config = VocabConfig() >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs']) >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0] >>> self = vocab >>> ut.quit_if_noshow() >>> data = train_vecs >>> centroids = vocab.wx_to_word >>> import wbia.plottool as pt >>> vt.plot_centroids(data, centroids, num_pca_dims=2) >>> ut.show_if_requested() >>> #config = ibs.depc_annot['vocab'].configclass() """ logger.info('[IBEIS] COMPUTE_VOCAB:') vecs_list = depc.get_native('feat', fid_list, 'vecs') train_vecs = np.vstack(vecs_list).astype(np.float32) num_words = config['num_words'] logger.info( '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (num_words, len(fid_list), len(train_vecs))) if config['algorithm'] == 'kdtree': flann_params = vt.get_flann_params(random_seed=42) kwds = dict(max_iters=20, flann_params=flann_params) words = vt.akmeans(train_vecs, num_words, **kwds) elif config['algorithm'] == 'minibatch': logger.info('Using minibatch kmeans') import sklearn.cluster rng = np.random.RandomState(config['random_seed']) n_init = config['n_init'] with warnings.catch_warnings(): warnings.simplefilter('ignore') init_size = int(num_words * 4) batch_size = 1000 n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size) minibatch_params = dict( n_clusters=num_words, init='k-means++', init_size=init_size, n_init=n_init, max_iter=30000 // n_batches, batch_size=batch_size, tol=0.0, max_no_improvement=10, reassignment_ratio=0.01, ) logger.info('minibatch_params = %s' % (ut.repr4(minibatch_params), )) clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False, random_state=rng, verbose=2, **minibatch_params) try: clusterer.fit(train_vecs) except (Exception, KeyboardInterrupt) as ex: ut.printex(ex, tb=True) if ut.is_developer(): ut.embed() else: raise words = clusterer.cluster_centers_ logger.info('Finished clustering') # if False: # flann_params['checks'] = 64 # flann_params['trees'] = 4 # num_words = 128 # centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++') # words, hist = vt.akmeans_iterations( # train_vecs, centroids, max_iters=1000, monitor=True, # flann_params=flann_params) logger.info('Constructing vocab') vocab = VisualVocab(words) logger.info('Building vocab index') vocab.build() logger.info('Returning vocab') return (vocab, )
def __init__(self, words=None): self.wx2_word = words self.wordflann = pyflann.FLANN() self.flann_params = vt.get_flann_params(random_seed=42)
def flann_add_time_experiment(): """ builds plot of number of annotations vs indexer build time. TODO: time experiment CommandLine: python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_MTEST --show python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_Master0 --show utprof.py -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --show valgrind --tool=memcheck --suppressions=valgrind-python.supp python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_MTEST --no-with-reindex Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots._neighbor_experiment import * # NOQA >>> import wbia >>> #ibs = wbia.opendb('PZ_MTEST') >>> result = flann_add_time_experiment() >>> # verify results >>> print(result) >>> ut.show_if_requested() """ import wbia import utool as ut import numpy as np import wbia.plottool as pt def make_flann_index(vecs, flann_params): flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) return flann db = ut.get_argval('--db') ibs = wbia.opendb(db=db) # Input if ibs.get_dbname() == 'PZ_MTEST': initial = 1 reindex_stride = 16 addition_stride = 4 max_ceiling = 120 elif ibs.get_dbname() == 'PZ_Master0': # ibs = wbia.opendb(db='GZ_ALL') initial = 32 reindex_stride = 32 addition_stride = 16 max_ceiling = 300001 else: assert False # max_ceiling = 32 all_daids = ibs.get_valid_aids() max_num = min(max_ceiling, len(all_daids)) flann_params = vt.get_flann_params() # Output count_list, time_list_reindex = [], [] count_list2, time_list_addition = [], [] # Setup # all_randomize_daids_ = ut.deterministic_shuffle(all_daids[:]) all_randomize_daids_ = all_daids # ensure all features are computed ibs.get_annot_vecs(all_randomize_daids_) def reindex_step(count, count_list, time_list_reindex): daids = all_randomize_daids_[0:count] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann = make_flann_index(vecs, flann_params) # NOQA count_list.append(count) time_list_reindex.append(t.ellapsed) def addition_step(count, flann, count_list2, time_list_addition): daids = all_randomize_daids_[count:count + 1] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann.add_points(vecs) count_list2.append(count) time_list_addition.append(t.ellapsed) def make_initial_index(initial): daids = all_randomize_daids_[0:initial + 1] vecs = np.vstack(ibs.get_annot_vecs(daids)) flann = make_flann_index(vecs, flann_params) return flann WITH_REINDEX = not ut.get_argflag('--no-with-reindex') if WITH_REINDEX: # Reindex Part reindex_lbl = 'Reindexing' _reindex_iter = range(1, max_num, reindex_stride) reindex_iter = ut.ProgressIter(_reindex_iter, lbl=reindex_lbl, freq=1) for count in reindex_iter: reindex_step(count, count_list, time_list_reindex) # Add Part flann = make_initial_index(initial) addition_lbl = 'Addition' _addition_iter = range(initial + 1, max_num, addition_stride) addition_iter = ut.ProgressIter(_addition_iter, lbl=addition_lbl) for count in addition_iter: addition_step(count, flann, count_list2, time_list_addition) logger.info('---') logger.info('Reindex took time_list_reindex %.2s seconds' % sum(time_list_reindex)) logger.info('Addition took time_list_reindex %.2s seconds' % sum(time_list_addition)) logger.info('---') statskw = dict(precision=2, newlines=True) logger.info('Reindex stats ' + ut.get_stats_str(time_list_reindex, **statskw)) logger.info('Addition stats ' + ut.get_stats_str(time_list_addition, **statskw)) logger.info('Plotting') # with pt.FigureContext: next_fnum = iter(range(0, 2)).next # python3 PY3 pt.figure(fnum=next_fnum()) if WITH_REINDEX: pt.plot2( count_list, time_list_reindex, marker='-o', equal_aspect=False, x_label='num_annotations', label=reindex_lbl + ' Time', dark=False, ) # pt.figure(fnum=next_fnum()) pt.plot2( count_list2, time_list_addition, marker='-o', equal_aspect=False, x_label='num_annotations', label=addition_lbl + ' Time', ) pt pt.legend()