예제 #1
0
    def __init__(vocab, words=None):
        vocab.wx_to_word = words
        vocab.wordflann = None

        vocab.flann_params = vt.get_flann_params(random_seed=42)
        vocab.flann_params['checks'] = 1024
        vocab.flann_params['trees'] = 8
예제 #2
0
def subindexer_time_experiment():
    """
    builds plot of number of annotations vs indexer build time.

    TODO: time experiment
    """
    import ibeis
    import utool as ut
    import pyflann
    import plottool as pt
    ibs = ibeis.opendb(db='PZ_Master0')
    daid_list = ibs.get_valid_aids()
    count_list = []
    time_list = []
    flann_params = vt.get_flann_params()
    for count in ut.ProgressIter(range(1, 301)):
        daids_ = daid_list[:]
        np.random.shuffle(daids_)
        daids = daids_[0:count]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        with ut.Timer(verbose=False) as t:
            flann = pyflann.FLANN()
            flann.build_index(vecs, **flann_params)
        count_list.append(count)
        time_list.append(t.ellapsed)
    count_arr = np.array(count_list)
    time_arr = np.array(time_list)
    pt.plot2(count_arr,
             time_arr,
             marker='-',
             equal_aspect=False,
             x_label='num_annotations',
             y_label='FLANN build time')
예제 #3
0
 def get_buildtime_data(**kwargs):
     flann_params = vt.get_flann_params(**kwargs)
     print('flann_params = %r' % (ut.dict_str(flann_params), ))
     data_list = []
     num = 1000
     print('-----')
     for count in ut.ProgressIter(itertools.count(),
                                  nTotal=-1,
                                  freq=1,
                                  autoadjust=False):
         num = int(num * 1.2)
         print('num = %r' % (num, ))
         #if num > 1E6:
         #    break
         data = pool.get_testdata(num)
         print('object size ' + ut.get_object_size_str(data, 'data'))
         flann = pyflann.FLANN(**flann_params)
         with ut.Timer(verbose=False) as t:
             flann.build_index(data)
         print('t.ellapsed = %r' % (t.ellapsed, ))
         if t.ellapsed > 5 or count > 1000:
             break
         data_list.append((count, num, t.ellapsed))
         print('-----')
     return data_list, flann_params
예제 #4
0
    def get_cfgstr(nnindexer, noquery=False):
        r""" returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m wbia.algo.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.algo.hots.neighbor_index import *  # NOQA
            >>> import wbia
            >>> cfgdict = dict(fg_on=False)
            >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False')
            >>> qreq_.load_indexer()
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        use_data_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(
                nnindexer.flann_params['algorithm'])
            # flann_params_clean = flann_defaults.copy()
            flann_params_clean = ut.sort_dict(flann_defaults)
            ut.update_existing(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo',
                              checks='chks',
                              random_seed='seed',
                              trees='t')
            short_params = ut.odict([
                (shortnames.get(key, key), str(val)[0:7])
                for key, val in six.iteritems(flann_params_clean)
            ])
            flann_valsig_ = ut.repr2(short_params,
                                     nl=False,
                                     explicit=True,
                                     strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            # flann_valsig_ = str(list(flann_params.values()))
            # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        if use_data_hash:
            vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr
예제 #5
0
    def get_cfgstr(nnindexer, noquery=False):
        r""" returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m ibeis.algo.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
            >>> import ibeis
            >>> cfgdict = dict(fg_on=False)
            >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False')
            >>> qreq_.load_indexer()
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        use_data_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm'])
            #flann_params_clean = flann_defaults.copy()
            flann_params_clean = ut.sort_dict(flann_defaults)
            ut.updateif_haskey(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t')
            short_params = ut.odict([(shortnames.get(key, key), str(val)[0:7])
                                     for key, val in six.iteritems(flann_params_clean)])
            flann_valsig_ = ut.dict_str(
                short_params, nl=False, explicit=True, strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            #flann_valsig_ = str(list(flann_params.values()))
            #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        if use_data_hash:
            vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr
예제 #6
0
def compute_vocab(depc, fid_list, config):
    r"""
    Args:
        depc (dtool.DependencyCache):
        fids_list (list):
        config (dtool.Config):

    CommandLine:
        python -m ibeis.core_annots --exec-compute_neighbor_index --show
        python -m ibeis.control.IBEISControl --test-show_depc_annot_table_input --show --tablename=neighbor_index

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.new_annots import *  # NOQA
        >>> import ibeis
        >>> ibs, aid_list = ibeis.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> fid_list = depc.get_rowids('feat', aid_list)
        >>> config = VocabConfig()
        >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, key_list=['vocab', 'train_vecs'])
        >>> idx2_vec = depc.d.get_feat_vecs(aid_list)[0]
        >>> self = vocab
        >>> ut.quit_if_noshow()
        >>> data = train_vecs
        >>> centroids = vocab.wx2_word
        >>> import plottool as pt
        >>> vt.plot_centroids(data, centroids, num_pca_dims=2)
        >>> ut.show_if_requested()
        >>> #config = ibs.depc_annot['vocab'].configclass()
        >>>
    """
    print('[IBEIS] COMPUTE_VOCAB:')
    vecs_list = depc.get_native('feat', fid_list, 'vecs')
    train_vecs = np.vstack(vecs_list)
    num_words = config['num_words']
    max_iters = 100
    print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' %
          (num_words, len(fid_list), len(train_vecs)))
    flann_params = vt.get_flann_params(random_seed=42)
    kwds = dict(
        max_iters=max_iters,
        flann_params=flann_params
    )
    words = vt.akmeans(train_vecs, num_words, **kwds)
    vocab = VisualVocab(words)
    vocab.reindex()
    return vocab
예제 #7
0
def compute_vocab(depc, fid_list, config):
    r"""
    Args:
        depc (dtool.DependencyCache):
        fids_list (list):
        config (dtool.Config):

    CommandLine:
        python -m ibeis.core_annots --exec-compute_neighbor_index --show
        python -m ibeis.control.IBEISControl --test-show_depc_annot_table_input --show --tablename=neighbor_index

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.new_annots import *  # NOQA
        >>> import ibeis
        >>> ibs, aid_list = ibeis.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> fid_list = depc.get_rowids('feat', aid_list)
        >>> config = VocabConfig()
        >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, key_list=['vocab', 'train_vecs'])
        >>> idx2_vec = depc.d.get_feat_vecs(aid_list)[0]
        >>> self = vocab
        >>> ut.quit_if_noshow()
        >>> data = train_vecs
        >>> centroids = vocab.wx2_word
        >>> import plottool as pt
        >>> vt.plot_centroids(data, centroids, num_pca_dims=2)
        >>> ut.show_if_requested()
        >>> #config = ibs.depc_annot['vocab'].configclass()
        >>>
    """
    print('[IBEIS] COMPUTE_VOCAB:')
    vecs_list = depc.get_native('feat', fid_list, 'vecs')
    train_vecs = np.vstack(vecs_list)
    num_words = config['num_words']
    max_iters = 100
    print(
        '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors'
        % (num_words, len(fid_list), len(train_vecs)))
    flann_params = vt.get_flann_params(random_seed=42)
    kwds = dict(max_iters=max_iters, flann_params=flann_params)
    words = vt.akmeans(train_vecs, num_words, **kwds)
    vocab = VisualVocab(words)
    vocab.reindex()
    return vocab
예제 #8
0
 def get_buildtime_data(**kwargs):
     flann_params = vt.get_flann_params(**kwargs)
     print('flann_params = %r' % (ut.dict_str(flann_params),))
     data_list = []
     num = 1000
     print('-----')
     for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False):
         num = int(num * 1.2)
         print('num = %r' % (num,))
         #if num > 1E6:
         #    break
         data = pool.get_testdata(num)
         print('object size ' + ut.get_object_size_str(data, 'data'))
         flann = pyflann.FLANN(**flann_params)
         with ut.Timer(verbose=False) as t:
             flann.build_index(data)
         print('t.ellapsed = %r' % (t.ellapsed,))
         if t.ellapsed > 5 or count > 1000:
             break
         data_list.append((count, num, t.ellapsed))
         print('-----')
     return data_list, flann_params
예제 #9
0
def compute_vocab(depc, fid_list, config):
    r"""
    Depcache method for computing a new visual vocab

    CommandLine:
        python -m wbia.core_annots --exec-compute_neighbor_index --show
        python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index

        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0
        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1

        # FIXME make util_tests register
        python -m wbia.algo.smk.vocab_indexer compute_vocab:0

    Ignore:
        >>> # Lev Oxford Debug Example
        >>> import wbia
        >>> ibs = wbia.opendb('Oxford')
        >>> depc = ibs.depc
        >>> table = depc['vocab']
        >>> # Check what currently exists in vocab table
        >>> table.print_configs()
        >>> table.print_table()
        >>> table.print_internal_info()
        >>> # Grab aids used to compute vocab
        >>> from wbia.expt.experiment_helpers import get_annotcfg_list
        >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1]
        >>> qaids, daids = expanded_aids_list[0]
        >>> vocab_aids = daids
        >>> config = {'num_words': 64000}
        >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config)
        >>> print('exists = %r' % (exists,))
        >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0]
        >>> print('vocab_rowid = %r' % (vocab_rowid,))
        >>> vocab = table.get_row_data([vocab_rowid], 'words')[0]
        >>> print('vocab = %r' % (vocab,))

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> # Test depcache access
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> input_tuple = [aid_list]
        >>> rowid_kw = {}
        >>> tablename = 'vocab'
        >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw)
        >>> vocab = depc.get(tablename, input_tuple, 'words')[0]
        >>> assert vocab.wordflann is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> fid_list = depc.get_rowids('feat', aid_list)
        >>> config = VocabConfig()
        >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs'])
        >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0]
        >>> self = vocab
        >>> ut.quit_if_noshow()
        >>> data = train_vecs
        >>> centroids = vocab.wx_to_word
        >>> import wbia.plottool as pt
        >>> vt.plot_centroids(data, centroids, num_pca_dims=2)
        >>> ut.show_if_requested()
        >>> #config = ibs.depc_annot['vocab'].configclass()

    """
    logger.info('[IBEIS] COMPUTE_VOCAB:')
    vecs_list = depc.get_native('feat', fid_list, 'vecs')
    train_vecs = np.vstack(vecs_list).astype(np.float32)
    num_words = config['num_words']
    logger.info(
        '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors'
        % (num_words, len(fid_list), len(train_vecs)))
    if config['algorithm'] == 'kdtree':
        flann_params = vt.get_flann_params(random_seed=42)
        kwds = dict(max_iters=20, flann_params=flann_params)
        words = vt.akmeans(train_vecs, num_words, **kwds)
    elif config['algorithm'] == 'minibatch':
        logger.info('Using minibatch kmeans')
        import sklearn.cluster

        rng = np.random.RandomState(config['random_seed'])
        n_init = config['n_init']
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            init_size = int(num_words * 4)
            batch_size = 1000
            n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size)
            minibatch_params = dict(
                n_clusters=num_words,
                init='k-means++',
                init_size=init_size,
                n_init=n_init,
                max_iter=30000 // n_batches,
                batch_size=batch_size,
                tol=0.0,
                max_no_improvement=10,
                reassignment_ratio=0.01,
            )
            logger.info('minibatch_params = %s' %
                        (ut.repr4(minibatch_params), ))
            clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False,
                                                        random_state=rng,
                                                        verbose=2,
                                                        **minibatch_params)
            try:
                clusterer.fit(train_vecs)
            except (Exception, KeyboardInterrupt) as ex:
                ut.printex(ex, tb=True)
                if ut.is_developer():
                    ut.embed()
                else:
                    raise
        words = clusterer.cluster_centers_
        logger.info('Finished clustering')
    # if False:
    #     flann_params['checks'] = 64
    #     flann_params['trees'] = 4
    #     num_words = 128
    #     centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++')
    #     words, hist = vt.akmeans_iterations(
    #         train_vecs, centroids, max_iters=1000, monitor=True,
    #         flann_params=flann_params)

    logger.info('Constructing vocab')
    vocab = VisualVocab(words)
    logger.info('Building vocab index')
    vocab.build()
    logger.info('Returning vocab')
    return (vocab, )
예제 #10
0
 def __init__(self, words=None):
     self.wx2_word = words
     self.wordflann = pyflann.FLANN()
     self.flann_params = vt.get_flann_params(random_seed=42)
def flann_add_time_experiment():
    """
    builds plot of number of annotations vs indexer build time.

    TODO: time experiment

    CommandLine:
        python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_MTEST --show
        python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_Master0 --show
        utprof.py -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --show

        valgrind --tool=memcheck --suppressions=valgrind-python.supp python -m wbia.algo.hots._neighbor_experiment --test-flann_add_time_experiment --db PZ_MTEST --no-with-reindex

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots._neighbor_experiment import *  # NOQA
        >>> import wbia
        >>> #ibs = wbia.opendb('PZ_MTEST')
        >>> result = flann_add_time_experiment()
        >>> # verify results
        >>> print(result)
        >>> ut.show_if_requested()

    """
    import wbia
    import utool as ut
    import numpy as np
    import wbia.plottool as pt

    def make_flann_index(vecs, flann_params):
        flann = pyflann.FLANN()
        flann.build_index(vecs, **flann_params)
        return flann

    db = ut.get_argval('--db')
    ibs = wbia.opendb(db=db)

    # Input
    if ibs.get_dbname() == 'PZ_MTEST':
        initial = 1
        reindex_stride = 16
        addition_stride = 4
        max_ceiling = 120
    elif ibs.get_dbname() == 'PZ_Master0':
        # ibs = wbia.opendb(db='GZ_ALL')
        initial = 32
        reindex_stride = 32
        addition_stride = 16
        max_ceiling = 300001
    else:
        assert False
    # max_ceiling = 32
    all_daids = ibs.get_valid_aids()
    max_num = min(max_ceiling, len(all_daids))
    flann_params = vt.get_flann_params()

    # Output
    count_list, time_list_reindex = [], []
    count_list2, time_list_addition = [], []

    # Setup
    # all_randomize_daids_ = ut.deterministic_shuffle(all_daids[:])
    all_randomize_daids_ = all_daids
    # ensure all features are computed
    ibs.get_annot_vecs(all_randomize_daids_)

    def reindex_step(count, count_list, time_list_reindex):
        daids = all_randomize_daids_[0:count]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        with ut.Timer(verbose=False) as t:
            flann = make_flann_index(vecs, flann_params)  # NOQA
        count_list.append(count)
        time_list_reindex.append(t.ellapsed)

    def addition_step(count, flann, count_list2, time_list_addition):
        daids = all_randomize_daids_[count:count + 1]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        with ut.Timer(verbose=False) as t:
            flann.add_points(vecs)
        count_list2.append(count)
        time_list_addition.append(t.ellapsed)

    def make_initial_index(initial):
        daids = all_randomize_daids_[0:initial + 1]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        flann = make_flann_index(vecs, flann_params)
        return flann

    WITH_REINDEX = not ut.get_argflag('--no-with-reindex')
    if WITH_REINDEX:
        # Reindex Part
        reindex_lbl = 'Reindexing'
        _reindex_iter = range(1, max_num, reindex_stride)
        reindex_iter = ut.ProgressIter(_reindex_iter, lbl=reindex_lbl, freq=1)
        for count in reindex_iter:
            reindex_step(count, count_list, time_list_reindex)

    # Add Part
    flann = make_initial_index(initial)
    addition_lbl = 'Addition'
    _addition_iter = range(initial + 1, max_num, addition_stride)
    addition_iter = ut.ProgressIter(_addition_iter, lbl=addition_lbl)
    for count in addition_iter:
        addition_step(count, flann, count_list2, time_list_addition)

    logger.info('---')
    logger.info('Reindex took time_list_reindex %.2s seconds' %
                sum(time_list_reindex))
    logger.info('Addition took time_list_reindex  %.2s seconds' %
                sum(time_list_addition))
    logger.info('---')
    statskw = dict(precision=2, newlines=True)
    logger.info('Reindex stats ' +
                ut.get_stats_str(time_list_reindex, **statskw))
    logger.info('Addition stats ' +
                ut.get_stats_str(time_list_addition, **statskw))

    logger.info('Plotting')

    # with pt.FigureContext:

    next_fnum = iter(range(0, 2)).next  # python3 PY3
    pt.figure(fnum=next_fnum())
    if WITH_REINDEX:
        pt.plot2(
            count_list,
            time_list_reindex,
            marker='-o',
            equal_aspect=False,
            x_label='num_annotations',
            label=reindex_lbl + ' Time',
            dark=False,
        )

    # pt.figure(fnum=next_fnum())
    pt.plot2(
        count_list2,
        time_list_addition,
        marker='-o',
        equal_aspect=False,
        x_label='num_annotations',
        label=addition_lbl + ' Time',
    )

    pt
    pt.legend()
예제 #12
0
 def __init__(self, words=None):
     self.wx2_word = words
     self.wordflann = pyflann.FLANN()
     self.flann_params = vt.get_flann_params(random_seed=42)