def test_index_document_with_skip_digit_false(config): from addok.helpers.index import _CACHE _CACHE.clear() # Do this in addok.pytest teardown? config.TRIGRAM_SKIP_DIGIT = False index_document(DOC.copy()) assert DB.exists('w|123') assert DB.exists('w|234') assert DB.exists('w|345') assert len(DB.keys()) == 17
def test_deindex_document_should_deindex_list_values(): doc = { 'id': 'xxxx', 'type': 'street', 'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'], 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565' } index_document(doc) deindex_document(doc['id']) assert not DB.exists('d|xxxx') assert not DB.exists('w|vernou') assert not DB.exists('w|celle') assert len(DB.keys()) == 0
def test_deindex_document_should_deindex_list_values(): doc = { 'id': 'xxxx', 'type': 'street', 'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'], 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565' } index_document(doc) deindex_document(doc['id']) assert not ds._DB.exists('d|xxxx') assert not DB.exists('w|vernou') assert not DB.exists('w|celle') assert len(DB.keys()) == 0
def test_index_document_without_explicit_id(): doc = DOC.copy() del doc['_id'] index_document(doc) assert ds._DB.exists('d|jR') assert ds._DB.type('d|jR') == b'string' assert DB.exists('w|rue')
def test_should_be_possible_to_define_fields_from_config(config): config.FIELDS = [ {'key': 'custom'}, {'key': 'special'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'custom': 'rue', 'special': 'Lilas', 'thisone': 'is not indexed', } index_document(doc) assert DB.exists('d|xxxx') assert DB.exists('w|lilas') assert DB.exists('w|rue') assert not DB.exists('w|indexed')
def test_should_be_possible_to_define_fields_from_config(config): config.FIELDS = [ { 'key': 'custom' }, { 'key': 'special' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'custom': 'rue', 'special': 'Lilas', 'thisone': 'is not indexed', } index_document(doc) assert ds._DB.exists('d|xxxx') assert DB.exists('w|lilas') assert DB.exists('w|rue') assert not DB.exists('w|indexed')
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config): config.FIELDS = [ {'key': 'name', 'null': False}, {'key': 'city'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '', 'city': 'Cergy' } index_document(doc) assert not DB.exists('d|xxxx')
def test_create_edge_ngrams(config): config.MIN_EDGE_NGRAMS = 2 doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '28 Lilas', # 28 should not appear in ngrams 'city': 'Paris' } index_document(doc, update_ngrams=False) assert not DB.exists('n|li') assert not DB.exists('n|lil') assert not DB.exists('n|pa') assert not DB.exists('n|par') create_edge_ngrams() assert DB.exists('n|li') assert DB.exists('n|lil') assert DB.exists('n|pa') assert DB.exists('n|par') assert not DB.exists('n|28') assert len(DB.keys()) == 12
def test_should_be_possible_to_override_boost_with_callable(config): config.FIELDS = [ {'key': 'name', 'boost': lambda doc: 5}, {'key': 'city'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': 'Cergy' } index_document(doc) assert DB.exists('d|xxxx') assert DB.zscore('w|lilas', 'd|xxxx') == 5 assert DB.zscore('w|cergy', 'd|xxxx') == 1
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config): config.FIELDS = [ { 'key': 'name', 'null': False }, { 'key': 'city' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '', 'city': 'Cergy' } index_document(doc) assert not DB.exists('w|cergy')
def test_deindex_document_should_not_affect_other_docs(): DOC2 = { 'id': 'xxxx2', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1': { 'lat': '48.325451', # Same geohash as DOC. 'lon': '2.25651' } } } index_document(DOC.copy()) index_document(DOC2) deindex_document(DOC['id']) assert not DB.exists('d|xxxx') assert DB.exists('w|rue') assert DB.exists('w|des') assert DB.exists('w|lil') assert DB.exists('w|un') # Housenumber. assert b'd|xxxx' not in DB.zrange('w|rue', 0, -1) assert b'd|xxxx' not in DB.zrange('w|des', 0, -1) assert b'd|xxxx' not in DB.zrange('w|lil', 0, -1) assert b'd|xxxx' not in DB.zrange('w|un', 0, -1) assert DB.exists('g|u09dgm7') assert b'd|xxxx' not in DB.smembers('g|u09dgm7') assert b'd|xxxx2' in DB.zrange('w|rue', 0, -1) assert b'd|xxxx2' in DB.zrange('w|des', 0, -1) assert b'd|xxxx2' in DB.zrange('w|lil', 0, -1) assert b'd|xxxx2' in DB.zrange('w|un', 0, -1) assert b'd|xxxx2' in DB.smembers('g|u09dgm7') assert b'd|xxxx2' in DB.smembers('g|u0g08g7') assert DB.exists('f|type|street') assert b'd|xxxx2' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx2' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 14
def test_should_be_possible_to_override_boost_with_callable(config): config.FIELDS = [ { 'key': 'name', 'boost': lambda doc: 5 }, { 'key': 'city' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': 'Cergy' } index_document(doc) assert DB.exists('d|xxxx') assert DB.zscore('w|lilas', 'd|xxxx') == 5 assert DB.zscore('w|cergy', 'd|xxxx') == 1
def test_deindex_document_should_not_affect_other_docs(): DOC2 = { 'id': 'xxxx2', '_id': 'xxxx2', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1': { 'lat': '48.325451', # Same geohash as DOC. 'lon': '2.25651' } } } index_document(DOC.copy()) index_document(DOC2) deindex_document(DOC['id']) assert not ds._DB.exists('d|xxxx') assert DB.exists('w|rue') assert DB.exists('w|des') assert DB.exists('w|lil') assert b'd|xxxx' not in DB.zrange('w|rue', 0, -1) assert b'd|xxxx' not in DB.zrange('w|des', 0, -1) assert b'd|xxxx' not in DB.zrange('w|lil', 0, -1) assert b'd|xxxx' not in DB.zrange('w|un', 0, -1) assert DB.exists('g|u09dgm7') assert b'd|xxxx' not in DB.smembers('g|u09dgm7') assert b'd|xxxx2' in DB.zrange('w|rue', 0, -1) assert b'd|xxxx2' in DB.zrange('w|des', 0, -1) assert b'd|xxxx2' in DB.zrange('w|lil', 0, -1) assert b'd|xxxx2' in DB.smembers('g|u09dgm7') assert b'd|xxxx2' in DB.smembers('g|u0g08g7') assert DB.exists('f|type|street') assert b'd|xxxx2' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx2' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 12
def deindex_token(key, token): tkey = token_key(token) DB.zrem(tkey, key) if not DB.exists(tkey): deindex_edge_ngrams(token)
def test_deindex_document_should_deindex(): index_document(DOC.copy()) deindex_document(DOC['id']) assert not ds._DB.exists('d|xxxx') assert not DB.exists('w|des') assert not DB.exists('w|lil') assert not DB.exists('w|ila') assert not DB.exists('w|las') assert not DB.exists('w|and') assert not DB.exists('w|ndr') assert not DB.exists('w|dre') assert not DB.exists('w|res') assert not DB.exists('w|esy') assert not DB.exists('g|u09dgm7') assert not DB.exists('f|type|street') assert not DB.exists('f|type|housenumber') assert len(DB.keys()) == 0
def deindex(db, key, doc, tokens, **kwargs): if config.INDEX_EDGE_NGRAMS: for token in tokens: tkey = dbkeys.token_key(token) if not DB.exists(tkey): deindex_edge_ngrams(token)
def search(self): if DB.exists(self.key): self.db_key = self.key
def test_deindex_document_should_deindex(): index_document(DOC.copy()) deindex_document(DOC['id']) assert not DB.exists('d|xxxx') assert not DB.exists('w|de') assert not DB.exists('w|lilas') assert not DB.exists('w|un') # Housenumber. assert not DB.exists('p|rue') assert not DB.exists('p|des') assert not DB.exists('p|lilas') assert not DB.exists('p|un') assert not DB.exists('g|u09dgm7') assert not DB.exists('n|lil') assert not DB.exists('n|and') assert not DB.exists('n|andr') assert not DB.exists('n|andre') assert not DB.exists('n|andres') assert not DB.exists('f|type|street') assert len(DB.keys()) == 0
def test_index_document(): index_document(DOC.copy()) assert DB.exists('d|xxxx') assert DB.type('d|xxxx') == b'hash' assert DB.exists('w|rue') assert b'd|xxxx' in DB.zrange('w|rue', 0, -1) assert DB.exists('w|des') assert DB.exists('w|lilas') assert DB.exists('w|andresy') assert DB.exists('w|un') # Housenumber. assert DB.exists('p|rue') assert DB.exists('p|des') assert DB.exists('p|lilas') assert DB.exists('p|andresy') assert b'lilas' in DB.smembers('p|andresy') assert b'andresy' in DB.smembers('p|lilas') assert DB.exists('p|un') assert DB.exists('g|u09dgm7') assert b'd|xxxx' in DB.smembers('g|u09dgm7') assert DB.exists('n|lil') assert DB.exists('n|lila') assert DB.exists('n|and') assert b'andresy' in DB.smembers('n|and') assert DB.exists('n|andr') assert b'andresy' in DB.smembers('n|andr') assert DB.exists('n|andre') assert b'andresy' in DB.smembers('n|andre') assert DB.exists('n|andres') assert b'andresy' in DB.smembers('n|andres') assert b'lilas' in DB.smembers('n|lil') assert DB.exists('f|type|street') assert b'd|xxxx' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 20
def test_index_document(): index_document(DOC.copy()) assert DB.exists('d|xxxx') assert DB.type('d|xxxx') == b'hash' assert DB.exists('w|ru') assert b'd|xxxx' in DB.zrange('w|ru', 0, -1) assert DB.exists('w|de') assert DB.exists('w|lila') assert DB.exists('w|andrezi') assert DB.exists('w|un') # Housenumber. assert DB.exists('p|ru') assert DB.exists('p|de') assert DB.exists('p|lila') assert DB.exists('p|andrezi') assert b'lila' in DB.smembers('p|andrezi') assert b'andrezi' in DB.smembers('p|lila') assert DB.exists('p|un') assert DB.exists('g|u09dgm7') assert b'd|xxxx' in DB.smembers('g|u09dgm7') assert DB.exists('n|lil') assert DB.exists('n|and') assert b'andrezi' in DB.smembers('n|and') assert DB.exists('n|andr') assert b'andrezi' in DB.smembers('n|andr') assert DB.exists('n|andre') assert b'andrezi' in DB.smembers('n|andre') assert DB.exists('n|andrez') assert b'andrezi' in DB.smembers('n|andrez') assert b'lila' in DB.smembers('n|lil') assert DB.exists('f|type|street') assert b'd|xxxx' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 19
def edge_ngram_deindexer(db, key, doc, tokens, **kwargs): if config.INDEX_EDGE_NGRAMS: for token in tokens: tkey = dbkeys.token_key(token) if not DB.exists(tkey): deindex_edge_ngrams(token)
def test_index_document(): index_document(DOC.copy()) assert ds._DB.exists('d|xxxx') assert ds._DB.type('d|xxxx') == b'string' assert DB.exists('w|rue') assert b'd|xxxx' in DB.zrange('w|rue', 0, -1) assert DB.exists('w|des') assert DB.exists('w|lilas') assert DB.exists('w|andresy') assert DB.exists('w|1') # Housenumber. assert DB.exists('p|rue') assert DB.exists('p|des') assert DB.exists('p|lilas') assert DB.exists('p|andresy') assert b'lilas' in DB.smembers('p|andresy') assert b'andresy' in DB.smembers('p|lilas') assert DB.exists('p|1') assert DB.exists('g|u09dgm7') assert b'd|xxxx' in DB.smembers('g|u09dgm7') assert DB.exists('n|lil') assert DB.exists('n|lila') assert DB.exists('n|and') assert b'andresy' in DB.smembers('n|and') assert DB.exists('n|andr') assert b'andresy' in DB.smembers('n|andr') assert DB.exists('n|andre') assert b'andresy' in DB.smembers('n|andre') assert DB.exists('n|andres') assert b'andresy' in DB.smembers('n|andres') assert b'lilas' in DB.smembers('n|lil') assert DB.exists('f|type|street') assert b'd|xxxx' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 19 assert len(ds._DB.keys()) == 1
def test_deindex_document_should_deindex(): index_document(DOC.copy()) deindex_document(DOC['id']) assert not ds._DB.exists('d|xxxx') assert not DB.exists('w|de') assert not DB.exists('w|lilas') assert not DB.exists('w|1') # Housenumber. assert not DB.exists('p|rue') assert not DB.exists('p|des') assert not DB.exists('p|lilas') assert not DB.exists('p|1') assert not DB.exists('g|u09dgm7') assert not DB.exists('n|lil') assert not DB.exists('n|and') assert not DB.exists('n|andr') assert not DB.exists('n|andre') assert not DB.exists('n|andres') assert not DB.exists('f|type|street') assert len(DB.keys()) == 0 assert len(ds._DB.keys()) == 0
def test_deindex_document_should_not_affect_other_docs(): DOC2 = { 'id': 'xxxx2', '_id': 'yyyy2', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1': { 'lat': '48.325451', # Same geohash as DOC. 'lon': '2.25651' } } } DOC1 = json.loads(json.dumps(DOC)) # deepcopy. # Add new housenumber so we can check it's deindexed. DOC1['housenumbers']['2'] = {'lat': '48.325459', 'lon': '2.25659'} index_document(DOC1) index_document(DOC2) deindex_document(DOC1['_id']) assert not ds._DB.exists('d|yyyy') assert b'd|yyyy' not in DB.zrange('w|rue', 0, -1) assert b'd|yyyy' not in DB.zrange('w|des', 0, -1) assert b'd|yyyy' not in DB.zrange('w|lilas', 0, -1) assert DB.exists('g|u09dgm7') assert b'd|yyyy' not in DB.smembers('g|u09dgm7') assert DB.exists('w|des') assert DB.exists('w|lilas') assert DB.exists('p|rue') assert b'd|yyyy2' in DB.zrange('w|rue', 0, -1) assert b'd|yyyy2' in DB.zrange('w|des', 0, -1) assert b'd|yyyy2' in DB.zrange('w|lilas', 0, -1) assert b'd|yyyy2' in DB.smembers('g|u09dgm7') assert b'd|yyyy2' in DB.smembers('g|u0g08g7') assert DB.exists('p|des') assert DB.exists('p|lilas') assert not DB.exists('n|and') assert not DB.exists('n|andr') assert not DB.exists('n|andre') assert not DB.exists('n|andres') assert DB.exists('n|par') assert DB.exists('n|pari') assert DB.exists('n|lil') assert DB.exists('n|lila') assert b'lilas' in DB.smembers('n|lil') assert b'lilas' in DB.smembers('n|lila') assert DB.exists('f|type|street') assert b'd|yyyy2' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|yyyy2' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 16 assert len(ds._DB.keys()) == 1