def store(self): if self._store is None: feature_indexes = None try: conf = yakonfig.get_global_config('dossier.store') feature_indexes = conf['feature_indexes'] except KeyError: pass self._store = Store(kvlayer.client(), feature_indexes=feature_indexes) return self._store
def __init__(self, *args, **kwargs): super(to_dossier_store, self).__init__(*args, **kwargs) kvl = kvlayer.client() feature_indexes = None try: conf = yakonfig.get_global_config('dossier.store') feature_indexes = conf['feature_indexes'] except KeyError: pass self.store = Store(kvl, feature_indexes=feature_indexes) tfidf_path = self.config.get('tfidf_path') self.tfidf = gensim.models.TfidfModel.load(tfidf_path)
def test_one_to_many_indexing(kvl): # noqa # This config defines an index named `foo` that automatically indexes # values in the `bar` and `baz` features. This means that an index scan # on the `foo` index will check values in the `bar` and `baz` features. index_config = [{'foo': ['bar', 'baz']}] store = Store(kvl, feature_indexes=index_config) fcx, fcy, fcz = FC(), FC(), FC() fcx['unrelated']['a'] = 1 fcy['bar']['a'] = 1 fcy['baz']['a'] = 1 fcz['baz']['a'] = 1 fcy['baz']['c'] = 1 fcz['baz']['b'] = 1 store.put([('x', fcx), ('y', fcy), ('z', fcz)]) assert list(store.index_scan('foo', 'a')) == ['y', 'z'] assert list(store.index_scan('foo', 'b')) == ['z'] assert list(store.index_scan('foo', 'c')) == ['y']
def fcstore(kvl): return Store(kvl)
def store(kvl): client = Store(kvl, feature_indexes=[u'feature']) yield client client.delete_all()
# There are more backends available like MySQL, PostgreSQL and Accumulo. # # See: https://github.com/diffeo/kvlayer # !!! IMPORTANT !!! # Define features that you want to index. This will let you quickly scan # for feature collections in the database with matching values. # # You don't have to index everything, but it's probably a good idea to index # the most prominent features. e.g., phone or email or website. # # These should correspond to the names of the corresponding features. feature_indexes = [u'phone', u'email', u'website', u'rate'] # Create a "store," which knows how to store and index feature collections. store = Store(conn, feature_indexes=feature_indexes) # Create a fresh feature collection and add a 'rate' feature. fc = FeatureCollection() fc['rate'] = StringCounter({ u'5per30': 5, u'5per60': 1, u'10per20': 2, }) # Content ids are the unique identifier for each feature collection. # It's probably sufficient to use whatever you have for "ad id." content_id = 'some_unique_value' store.put([(content_id, fc)]) print store.get(content_id)