def dump_vectors(qreq_): """ Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST', a='default:mingt=2,pername=2') >>> qaids = aid_list[0:2] >>> daids = aid_list[:] >>> config = {'nAssign': 1, 'num_words': 8000, >>> 'sv_on': True} >>> qreq_ = SMKRequest(ibs, qaids, daids, config) >>> qreq_.ensure_data() """ inva = qreq_.dinva X = qreq_.dinva.get_annot(qreq_.daids[0]) n_words = inva.wx_list[-1] + 1 n_dims = X.agg_rvecs.shape[1] n_annots = len(qreq_.daids) X.agg_rvecs.dtype vlads = np.zeros((n_annots, n_words, n_dims), dtype=np.float32) ids_ = list(zip(qreq_.dnids, qreq_.daids)) for count, (nid, aid) in enumerate(ut.ProgIter(ids_, label='vlad')): # X.rrr() X = qreq_.dinva.get_annot(aid) out = vlads[count] out[X.wx_list] = X.agg_rvecs # X.to_dense(out=out) # Flatten out vlads.shape = (n_annots, n_words * n_dims) ut.print_object_size(vlads) fname = 'vlad_%d_d%d_%s' % (n_annots, n_words * n_dims, qreq_.ibs.get_dbname()) fpath = ut.truepath('~/' + fname + '.mat') import scipy.io mdict = { 'vlads': vlads, 'nids': qreq_.dnids, } scipy.io.savemat(fpath, mdict)
def hyrule_vocab_test(): from yael.yutils import load_ext from os.path import join import sklearn.cluster dbdir = ut.truepath('/raid/work/Oxford/') datadir = dbdir + '/smk_data_iccv_2013/data/' # Files storing descriptors/geometry for Oxford5k dataset test_sift_fname = join(datadir, 'oxford_sift.uint8') # test_nf_fname = join(datadir, 'oxford_nsift.uint32') all_vecs = load_ext(test_sift_fname, ndims=128, verbose=True).astype(np.float32) logger.info(ut.print_object_size(all_vecs)) # nfeats_list = load_ext(test_nf_fname, verbose=True) with ut.embed_on_exception_context: rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) num_words = int(2**16) init_size = num_words * 4 # converged after 26043 iterations minibatch_params = dict( n_clusters=num_words, init='k-means++', # init='random', init_size=init_size, n_init=1, max_iter=100, batch_size=1000, tol=0.0, max_no_improvement=10, reassignment_ratio=0.01, ) clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False, random_state=rng, verbose=1, **minibatch_params) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ logger.info(words.shape)
print('TEST2') print_ids() import imp # Causes utool.write_to to be bad # because utool doesnt do a rrr itself imp.reload(utool.util_io) print_ids() def docstr_test1(): print_ids() utool.rrr() print_ids() def docstr_test2(): print_ids() utool.rrr() print_ids() if __name__ == '__main__': utool.print_object_size(utool) reloading_test1() #reloading_test2() reloading_test1() reloading_test1() #reloading_test2() utool.print_object_size(utool) pass