Exemplo n.º 1
0
def learn_visual_words(annots_df, taids, nWords, use_cache=USE_CACHE_WORDS,
                       with_pandas=WITH_PANDAS):
    """
    Computes visual words
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, taids, daids, qaids, nWords = smk_debug.testdata_dataframe()
    >>> use_cache = USE_CACHE_WORDS
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> print(words.shape)
    (8000, 128)
    """
    max_iters = 200
    flann_params = {}
    train_vecs_list = [pdh.ensure_values(vecs) for vecs in annots_df['vecs'][taids].values]
    train_vecs = np.vstack(train_vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    kwds = dict(max_iters=max_iters, use_cache=use_cache, appname='smk',
                flann_params=flann_params)
    _words = clustertool.cached_akmeans(train_vecs, nWords, **kwds)
    if with_pandas:
        # Pandasify
        wx_series = pdh.RangeIndex(len(_words), name='wx')
        #words = pd.DataFrame(_words, index=wx_series, columns=VEC_COLUMNS)
        words = pd.DataFrame(_words, index=wx_series)
    else:
        words = _words
    return words
Exemplo n.º 2
0
def learn_visual_words(annots_df, train_aids, nCentroids):
    vecs_list = annots_df['vecs'][train_aids].as_matrix()
    train_vecs = np.vstack(vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nCentroids, len(train_aids), len(train_vecs)))
    words = clustertool.cached_akmeans(train_vecs, nCentroids, max_iters=100)
    return words
Exemplo n.º 3
0
def train_paris_vocab(ibs):
    """
    CommandLine:
        python dev.py --db Paris --cmd
    """
    # UNFINISHED
    aid_list = []
    # use only one annotion per image
    for aids in ibs.get_image_aids(ibs.get_valid_gids()):
        if len(aids) == 1:
            aid_list.append(aids[0])
        else:
            # use annote with largest area
            aid_list.append(aids[np.argmax(ibs.get_annot_bbox_area(aids))])

    vecs_list = ibs.get_annot_vecs(aid_list)
    vecs = np.vstack(vecs_list)
    nWords = 8000
    from vtool import clustering2 as clustertool
    print('vecs are: %r' % utool.get_object_size_str(vecs))

    _words = clustertool.cached_akmeans(vecs, nWords, max_iters=500, use_cache=True, appname='smk')  # NOQA

    vec_mean = vecs.mean(axis=0).astype(np.float32)
    vec_mean.shape = (1, vec_mean.shape[0])
    vecs_centered = vecs - vec_mean
    norm_ = npl.norm(arr1, axis=1)
    norm_.shape = (norm_.size, 1)
    vecs_norm = np.divide(arr1, norm_)  # , out=out)
    print('vecs_centered are: %r' % utool.get_object_size_str(vecs_centered))
    vecs_post = np.round(128 * np.sqrt(np.abs(vecs_norm)) * np.sign(vecs_norm)).astype(np.int8)  # NOQA
Exemplo n.º 4
0
def learn_visual_words(annots_df, taids, nWords, use_cache=USE_CACHE_WORDS):
    """
    Computes visual words
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, taids, daids, qaids, nWords = smk_debug.testdata_dataframe()
    >>> use_cache = USE_CACHE_WORDS
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> print(words.shape)
    (8000, 128)
    """
    max_iters = 200
    flann_params = {}
    train_vecs_list = [
        pdh.ensure_values(vecs) for vecs in annots_df['vecs'][taids].values
    ]
    train_vecs = np.vstack(train_vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    kwds = dict(max_iters=max_iters,
                use_cache=use_cache,
                appname='smk',
                flann_params=flann_params)
    _words = clustertool.cached_akmeans(train_vecs, nWords, **kwds)
    if WITH_PANDAS:
        # Pandasify
        wx_series = pdh.RangeIndex(len(_words), name='wx')
        #words = pd.DataFrame(_words, index=wx_series, columns=VEC_COLUMNS)
        words = pd.DataFrame(_words, index=wx_series)
    else:
        words = _words
    return words
Exemplo n.º 5
0
def learn_visual_words(annots_df, taids, nWords, use_cache=True):
    """
    Computes visual words
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    """
    vecs_list = annots_df['vecs'][taids].as_matrix()
    train_vecs = np.vstack(vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    cache_dir = utool.get_app_resource_dir('smk')
    words = clustertool.cached_akmeans(train_vecs, nWords, max_iters=100,
                                       use_cache=use_cache, cache_dir=cache_dir)
    return words
Exemplo n.º 6
0
Arquivo: smk1.py Projeto: whaozl/ibeis
def learn_visual_words(annots_df, taids, nWords, use_cache=True):
    """
    Computes visual words
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    """
    vecs_list = annots_df['vecs'][taids].as_matrix()
    train_vecs = np.vstack(vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    cache_dir = utool.get_app_resource_dir('smk')
    words = clustertool.cached_akmeans(train_vecs,
                                       nWords,
                                       max_iters=100,
                                       use_cache=use_cache,
                                       cache_dir=cache_dir)
    return words
Exemplo n.º 7
0
def learn_visual_words(ibs, config2_=None, use_cache=USE_CACHE_WORDS, memtrack=None):
    """
    Computes and caches visual words

    Args:
        ibs (?):
        qreq_ (QueryRequest):  query request object with hyper-parameters
        use_cache (bool):  turns on disk based caching(default = True)
        memtrack (None): (default = None)

    Returns:
        ndarray[uint8_t, ndim=2]: words -  aggregate descriptor cluster centers

    Returns:
        words

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe()
        >>> use_cache = True
        >>> words = learn_visual_words(ibs, qreq_)
        >>> print(words.shape)
        (8000, 128)

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_Master1')
        >>> config2_ = ibs.new_query_params(cfgdict=dict(nWords=128000))
        >>> use_cache = True
        >>> words = learn_visual_words(ibs, config2_)
        >>> print(words.shape)
        (8000, 128)

    Auto:
        from ibeis.algo.hots.smk import smk_index
        import utool as ut
        argdoc = ut.make_default_docstr(smk_index.learn_visual_words)
        print(argdoc)
    """
    #if memtrack is None:
    #    memtrack = ut.MemoryTracker('[learn_visual_words]')
    #config2_ = qreq_.get_external_data_config2()
    nWords = config2_.nWords
    # TODO: Incorporated taids (vocab training ids) into qreq
    if config2_.vocab_taids == 'all':
        taids = ibs.get_valid_aids(species=ibs.get_primary_database_species())  # exemplar
    else:
        taids = config2_.vocab_taids
    initmethod   = config2_.vocab_init_method
    max_iters    = config2_.vocab_nIters
    flann_params = config2_.vocab_flann_params
    train_vecs_list = ibs.get_annot_vecs(taids, eager=True, config2_=config2_)
    #memtrack.track_obj(train_vecs_list[0], 'train_vecs_list[0]')
    #memtrack.report('loaded trainvecs')
    train_vecs = np.vstack(train_vecs_list)
    #memtrack.track_obj(train_vecs, 'train_vecs')
    #memtrack.report('stacked trainvecs')
    del train_vecs_list
    print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    kwds = dict(max_iters=max_iters, use_cache=use_cache,
                initmethod=initmethod, appname='smk',
                flann_params=flann_params)
    words = clustertool.cached_akmeans(train_vecs, nWords, **kwds)
    del train_vecs
    del kwds
    #memtrack.report('returning words')
    #del train_vecs_list
    return words
Exemplo n.º 8
0
def learn_visual_words(ibs, config2_=None, use_cache=USE_CACHE_WORDS, memtrack=None):
    """
    Computes and caches visual words

    Args:
        ibs (?):
        qreq_ (QueryRequest):  query request object with hyper-parameters
        use_cache (bool):  turns on disk based caching(default = True)
        memtrack (None): (default = None)

    Returns:
        ndarray[uint8_t, ndim=2]: words -  aggregate descriptor cluster centers

    Returns:
        words

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe()
        >>> use_cache = True
        >>> words = learn_visual_words(ibs, qreq_)
        >>> print(words.shape)
        (8000, 128)

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_Master1')
        >>> config2_ = ibs.new_query_params(cfgdict=dict(nWords=128000))
        >>> use_cache = True
        >>> words = learn_visual_words(ibs, config2_)
        >>> print(words.shape)
        (8000, 128)

    Auto:
        from ibeis.algo.hots.smk import smk_index
        import utool as ut
        argdoc = ut.make_default_docstr(smk_index.learn_visual_words)
        print(argdoc)
    """
    #if memtrack is None:
    #    memtrack = ut.MemoryTracker('[learn_visual_words]')
    #config2_ = qreq_.extern_data_config2
    nWords = config2_.nWords
    # TODO: Incorporated taids (vocab training ids) into qreq
    if config2_.vocab_taids == 'all':
        taids = ibs.get_valid_aids(species=ibs.get_primary_database_species())  # exemplar
    else:
        taids = config2_.vocab_taids
    initmethod   = config2_.vocab_init_method
    max_iters    = config2_.vocab_nIters
    flann_params = config2_.vocab_flann_params
    train_vecs_list = ibs.get_annot_vecs(taids, eager=True, config2_=config2_)
    #memtrack.track_obj(train_vecs_list[0], 'train_vecs_list[0]')
    #memtrack.report('loaded trainvecs')
    train_vecs = np.vstack(train_vecs_list)
    #memtrack.track_obj(train_vecs, 'train_vecs')
    #memtrack.report('stacked trainvecs')
    del train_vecs_list
    print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' %
          (nWords, len(taids), len(train_vecs)))
    kwds = dict(max_iters=max_iters, use_cache=use_cache,
                initmethod=initmethod, appname='smk',
                flann_params=flann_params)
    words = clustertool.cached_akmeans(train_vecs, nWords, **kwds)
    del train_vecs
    del kwds
    #memtrack.report('returning words')
    #del train_vecs_list
    return words