Exemplo n.º 1
0
 def store(self):
     if self._store is None:
         feature_indexes = None
         try:
             conf = yakonfig.get_global_config('dossier.store')
             feature_indexes = conf['feature_indexes']
         except KeyError:
             pass
         self._store = Store(kvlayer.client(),
                             feature_indexes=feature_indexes)
     return self._store
Exemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     super(to_dossier_store, self).__init__(*args, **kwargs)
     kvl = kvlayer.client()
     feature_indexes = None
     try:
         conf = yakonfig.get_global_config('dossier.store')
         feature_indexes = conf['feature_indexes']
     except KeyError:
         pass
     self.store = Store(kvl, feature_indexes=feature_indexes)
     tfidf_path = self.config.get('tfidf_path')
     self.tfidf = gensim.models.TfidfModel.load(tfidf_path)
Exemplo n.º 3
0
def test_one_to_many_indexing(kvl):  # noqa
    # This config defines an index named `foo` that automatically indexes
    # values in the `bar` and `baz` features. This means that an index scan
    # on the `foo` index will check values in the `bar` and `baz` features.
    index_config = [{'foo': ['bar', 'baz']}]
    store = Store(kvl, feature_indexes=index_config)

    fcx, fcy, fcz = FC(), FC(), FC()
    fcx['unrelated']['a'] = 1
    fcy['bar']['a'] = 1
    fcy['baz']['a'] = 1
    fcz['baz']['a'] = 1
    fcy['baz']['c'] = 1
    fcz['baz']['b'] = 1

    store.put([('x', fcx), ('y', fcy), ('z', fcz)])

    assert list(store.index_scan('foo', 'a')) == ['y', 'z']
    assert list(store.index_scan('foo', 'b')) == ['z']
    assert list(store.index_scan('foo', 'c')) == ['y']
Exemplo n.º 4
0
def fcstore(kvl):
    return Store(kvl)
Exemplo n.º 5
0
def store(kvl):
    client = Store(kvl, feature_indexes=[u'feature'])
    yield client
    client.delete_all()
Exemplo n.º 6
0
# There are more backends available like MySQL, PostgreSQL and Accumulo.
#
# See: https://github.com/diffeo/kvlayer

# !!! IMPORTANT !!!
# Define features that you want to index. This will let you quickly scan
# for feature collections in the database with matching values.
#
# You don't have to index everything, but it's probably a good idea to index
# the most prominent features. e.g., phone or email or website.
#
# These should correspond to the names of the corresponding features.
feature_indexes = [u'phone', u'email', u'website', u'rate']

# Create a "store," which knows how to store and index feature collections.
store = Store(conn, feature_indexes=feature_indexes)

# Create a fresh feature collection and add a 'rate' feature.
fc = FeatureCollection()
fc['rate'] = StringCounter({
    u'5per30': 5,
    u'5per60': 1,
    u'10per20': 2,
})

# Content ids are the unique identifier for each feature collection.
# It's probably sufficient to use whatever you have for "ad id."
content_id = 'some_unique_value'
store.put([(content_id, fc)])
print store.get(content_id)