def test_optional_indexing(store, fcs): store.put(fcs) foo1 = FC({ 'NAME': { 'Foo Bar': 1 }, 'boNAME': { 'bruce': 1, 'patti': 1, 'foo': 1, 'bar': 1 }, }) foo2 = FC({ 'NAME': { 'Foo Baz': 1 }, 'boNAME': { 'foo': 1, 'baz': 1 }, }) store.put([('foo1', foo1), ('foo2', foo2)], indexes=False) # Are they really there? assert store.get('foo1') == foo1 assert store.get('foo2') == foo2 assert frozenset(store.keyword_scan_ids('foo1')) \ == frozenset(['boss', 'patti']) assert frozenset(store.index_scan_ids('boNAME', 'patti')) \ == frozenset(['patti'])
def test_index_key_flip(fcstore): # Make sure only values from the specified index are returned. fca, fcb = FC(), FC() fca[u'a']['foo'] = 1 fca[u'b']['foo'] = 1 fcstore.define_index(u'a', feature_index('a'), lambda s: s.lower().encode('utf-8')) fcstore.define_index(u'b', feature_index('b'), lambda s: s.lower().encode('utf-8')) fcstore.put([('fca', fca), ('fcb', fcb)]) assert list(fcstore.index_scan(u'a', u'foo')) == ['fca']
def fcs_texts(fcs): return [('boss', FC({ 'NAME': { 'Bruce Springsteen': 2, 'The Boss': 1, }, 'boNAME': { 'bruce': 2, 'springsteen': 5, 'the': 1, 'boss': 1, }, 'body': { u"The screen door slams, Mary's dress sways": 1 }, 'body2': { u"Like a vision she dances across the porch": 1 }, })), ('patti', FC({ 'NAME': { 'Patti Scialfa': 1, }, 'boNAME': { 'patti': 10, 'scialfa': 1, }, 'body': { u"I come from down in the valley": 1 }, })), ('big-man', FC({ 'NAME': { 'Clarence Clemons': 8, 'The Big Man': 1, }, 'boNAME': { 'clarence': 8, 'clemons': 8, 'the': 1, 'big': 1, 'man': 1, }, 'body': { u"Drinking warm beer in the soft summer rain": 1 }, }))]
def mk_fc_names(*names): assert len(names) > 0 feat = FC() feat[u'canonical_name'][names[0]] = 1 for name in names: feat[u'NAME'][name] += 1 return feat
def test_fulltext_scan(store, fcs_texts): store.put(fcs_texts) query = FC({u'body': {u'valley': 1}}) assert frozenset(map(itemgetter(1), store.fulltext_scan_ids(query_fc=query))) \ == frozenset(['patti']) query = FC({u'body': {u'in': 1}}) assert frozenset(map(itemgetter(1), store.fulltext_scan_ids(query_fc=query))) \ == frozenset(['patti', 'big-man']) query = FC({u'body': {u"mary's": 1}}) assert frozenset(map(itemgetter(1), store.fulltext_scan_ids(query_fc=query))) \ == frozenset(['boss'])
def test_scan_all_weird(store): ids = [ '99bc49e2492a48cb9179d70d3c11ea13', 'd858432fd50f4cb5a01af290358cb0d1', 'be75888da4854c15859692b6db590f55', ] assert len(list(store.scan_ids())) == 0 store.put([(id, FC()) for id in ids]) assert list(store.scan_ids()) == sorted(ids)
def test_one_to_many_indexing(kvl): # noqa # This config defines an index named `foo` that automatically indexes # values in the `bar` and `baz` features. This means that an index scan # on the `foo` index will check values in the `bar` and `baz` features. index_config = [{'foo': ['bar', 'baz']}] store = Store(kvl, feature_indexes=index_config) fcx, fcy, fcz = FC(), FC(), FC() fcx['unrelated']['a'] = 1 fcy['bar']['a'] = 1 fcy['baz']['a'] = 1 fcz['baz']['a'] = 1 fcy['baz']['c'] = 1 fcz['baz']['b'] = 1 store.put([('x', fcx), ('y', fcy), ('z', fcz)]) assert list(store.index_scan('foo', 'a')) == ['y', 'z'] assert list(store.index_scan('foo', 'b')) == ['z'] assert list(store.index_scan('foo', 'c')) == ['y']
def test_fulltext_scan_indexes(store, fcs_texts): store.put(fcs_texts) # Make sure we search features in whitelist. query = FC({u'body': {u'valley': 1}}) assert frozenset(map(itemgetter(1), store.fulltext_scan_ids(query_fc=query, indexes=['body']))) \ == frozenset(['patti']) # Make sure we dont search features out of whitelist. query = FC({u'body': {u'in': 1}}) ids = list(store.fulltext_scan_ids(query_fc=query, indexes=[])) assert len(ids) == 0 # Make sure we raise ValueError when trying to scan non-indexed feature. query = FC({u'body': {u'in': 1}}) with pytest.raises(ValueError): ids = list( store.fulltext_scan_ids(query_fc=query, indexes=['body', 'i-dont-exist']))
def test_keyword_scan_emphemeral(store, fcs): store.put(fcs) query_id = 'pattim' query_fc = FC({'NAME': {'Patti Mayonnaise': 1}}) assert frozenset(store.keyword_scan_ids(query_id, query_fc)) \ == frozenset() query_fc['boNAME']['patti'] += 1 query_fc['boNAME']['mayonnaise'] += 1 assert frozenset(store.keyword_scan_ids(query_id, query_fc)) \ == frozenset(['patti'])
def test_keyword_scan_partial(store, fcs): store.put(fcs) assert_set_eq(store.keyword_scan('boss'), [('big-man', fcget(fcs, 'big-man'))]) expected = FC({ 'NAME': { 'Clarence Clemons': 8, 'The Big Man': 1 }, }) assert_set_eq(store.keyword_scan('boss', feature_names=['NAME']), [('big-man', expected)])
def test_geotime_filter(): fname = '!both_co_LOC_1' gc1 = GeoCoords({'foo': [(10, 10, 10, None)]}) gc2 = GeoCoords({'foo': [(10, 10, 10, None), (-10, 10, 10, 10)]}) gc3 = GeoCoords({'foo': [(-10, 10, 10, None), (10, 10, 10, 10)]}) fc1 = FC() fc1[fname] = gc1 fc2 = FC() fc2[fname] = gc2 fc3 = FC() fc3[fname] = gc3 pred = geotime().set_query_params({ 'min_lat': 0, 'max_lat': 20, 'min_lon': -20, 'max_lon': 0, 'min_time': 0, }).create_predicate() results = filter(pred, [('', fc1), ('', fc2), ('', fc3)]) assert len(results) == 1 assert results[0][1] == fc2
def fcs(): return [('boss', FC({ 'NAME': { 'Bruce Springsteen': 2, 'The Boss': 1, }, 'boNAME': { 'bruce': 2, 'springsteen': 5, 'the': 1, 'boss': 1, }, })), ('patti', FC({ 'NAME': { 'Patti Scialfa': 1, }, 'boNAME': { 'patti': 10, 'scialfa': 1, }, })), ('big-man', FC({ 'NAME': { 'Clarence Clemons': 8, 'The Big Man': 1, }, 'boNAME': { 'clarence': 8, 'clemons': 8, 'the': 1, 'big': 1, 'man': 1, }, }))]
def test_scan_all_random(store): import random ids = [] for _ in xrange(100): s = ''.join([ chr(random.randrange(ord('a'), ord('z') + 1)) for _ in xrange(random.randrange(2, 20)) ]) ids.append(s) ids = list(set(ids)) assert len(list(store.scan_ids())) == 0 store.put([(id, FC()) for id in ids]) assert list(store.scan_ids()) == sorted(ids)
def test_fulltext_mapping_keyword(elastic_address, namespace_string, fcs_texts): store = ElasticStoreSync(hosts=elastic_address, namespace=NAMESPACE, type=namespace_string, fulltext_indexes=[{ 'body': ['body2'], }]) try: store.put(fcs_texts) query = FC({'body': {'vision': 1}}) assert frozenset(map(itemgetter(1), store.fulltext_scan_ids(query_fc=query))) \ == frozenset(['boss']) finally: store.delete_all()
def test_index_mapping_keyword(elastic_address, namespace_string, fcs): store = ElasticStoreSync(hosts=elastic_address, namespace=NAMESPACE, type=namespace_string, feature_indexes=[{ 'NAME': { 'es_index_type': 'string', 'feature_names': ['NAME', 'boNAME'], }, }, { 'boNAME': { 'es_index_type': 'string', 'feature_names': [] }, }]) try: store.put(fcs) query = FC({'NAME': {'The Boss': 1, 'clarence': 1}}) assert frozenset(store.keyword_scan_ids('ephemeral', query)) \ == frozenset(['boss', 'big-man']) finally: store.delete_all()
def test_put_overwrite(store, fcs): store.put(fcs) newfc = FC({'NAME': {'foo': 1, 'bar': 1}}) store.put([('boss', newfc)]) got = store.get('boss') assert got == newfc
def test_scan_ids(store): store.put([ ('a', FC()), ('b', FC()), ('c', FC()), ('d', FC()), ('e', FC()), ('f', FC()), ('g', FC()), ('h', FC()), ('i', FC()), ('j', FC()), ('k', FC()), ('l', FC()), ('m', FC()), ('n', FC()), ('o', FC()), ]) expected = 'abcdefghijklmno' got = ''.join(sorted(store.scan_ids())) assert expected == got
def test_byte_keys(store): fc = FC({'NAME': {'Foo Bar': 1}}) store.put([('\x00\xff\xf4', fc)])
def fc_from_dict(self, fc_dict): d = {} for name, feat in fc_dict.iteritems(): d[name] = cbor.loads(base64.b64decode(feat)) return FC(d)