def run_sanitizer_on(**kwargs): place = PlaceInfo({'name': kwargs}) name, _ = PlaceSanitizer([{ 'step': 'split-name-list' }]).process_names(place) return sorted([(p.name, p.kind, p.suffix) for p in name])
def index_places(self, worker, places): values = [] for place in places: values.extend((place[x] for x in ('place_id', 'address'))) values.append(PlaceInfo(place).analyze(self.analyzer)) worker.perform(self._index_sql(len(places)), values)
def test_no_name_list(): place = PlaceInfo({'address': {'housenumber': '3'}}) name, address = PlaceSanitizer([{ 'step': 'split-name-list' }]).process_names(place) assert not name assert len(address) == 1
def index_places(self, worker, places): values = [] for place in places: for field in ('place_id', 'name', 'address', 'linked_place_id'): values.append(place[field]) values.append(PlaceInfo(place).analyze(self.analyzer)) worker.perform(self._index_sql(len(places)), values)
def test_no_names(): place = PlaceInfo({'address': {'housenumber': '3'}}) name, address = PlaceSanitizer([{ 'step': 'strip-brace-terms' }]).process_names(place) assert not name assert len(address) == 1
def test_process_place_housenumbers_simple(analyzer, hnr): info = analyzer.process_place( PlaceInfo({'address': { 'housenumber': hnr }})) assert info['hnr'] == hnr assert info['hnr_tokens'].startswith("{")
def test_process_place_names(analyzer, make_keywords): info = analyzer.process_place( PlaceInfo({'name': { 'name': 'Soft bAr', 'ref': '34' }})) assert info['names'] == '{1,2,3}'
def sanitize_with_delimiter(delimiter, name): place = PlaceInfo({'name': {'name': name}}) san = PlaceSanitizer([{ 'step': 'split-name-list', 'delimiters': delimiter }]) name, _ = san.process_names(place) return sorted([p.name for p in name])
def test_sanitizer_empty_list(rules): san = sanitizer.PlaceSanitizer(rules) name, address = san.process_names( PlaceInfo({'name': { 'name:de:de': '1;2;3' }})) assert len(name) == 1 assert all(isinstance(n, sanitizer.PlaceName) for n in name)
def test_process_place_street_from_cache(self): self.analyzer.process_place(PlaceInfo({'name': { 'name': 'Grand Road' }})) self.process_address(street='Grand Road') # request address again info = self.process_address(street='Grand Road') assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_housenumbers_duplicates(analyzer): info = analyzer.process_place( PlaceInfo({ 'address': { 'housenumber': '134', 'conscriptionnumber': '134', 'streetnumber': '99a' } })) assert set(info['hnr'].split(';')) == set(('134', '99a'))
def run_sanitizer_on(country, **kwargs): place = PlaceInfo({ 'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country }) name, _ = PlaceSanitizer([{ 'step': 'tag-analyzer-by-language' }]).process_names(place) return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
def test_country_name(self, word_table): place = PlaceInfo({'name' : {'name': 'Norge'}, 'country_code': 'no', 'rank_address': 4, 'class': 'boundary', 'type': 'administrative'}) info = self.analyzer.process_place(place) self.expect_name_terms(info, '#norge', 'norge') assert word_table.get_country() == {('no', ' norge')}
def test_missing_country(self): place = PlaceInfo({'name': {'name': 'something'}}) name, _ = PlaceSanitizer([{ 'step': 'tag-analyzer-by-language', 'use-defaults': 'all', 'mode': 'replace' }]).process_names(place) assert len(name) == 1 assert name[0].name == 'something' assert name[0].suffix is None assert 'analyzer' not in name[0].attr
def test_process_place_multiple_street_tags(self): self.analyzer.process_place( PlaceInfo({'name': { 'name': 'Grand Road', 'ref': '05989' }})) info = self.process_address(**{ 'street': 'Grand Road', 'street:sym_ul': '05989' }) assert eval(info['street']) == self.name_token_set( '#Grand Road', '#05989')
def add_country_names(self, country_code, names): """ Add names for the given country to the search index. """ # Make sure any name preprocessing for country names applies. info = PlaceInfo({ 'name': names, 'country_code': country_code, 'rank_address': 4, 'class': 'boundary', 'type': 'administrative' }) self._add_country_full_names(country_code, self.sanitizer.process_names(info)[0])
def test_process_place_address_terms(self): for name in ('Zwickau', 'Haupstraße', 'Sachsen'): self.analyzer.process_place(PlaceInfo({'name': {'name' : name}})) info = self.process_address(country='de', city='Zwickau', state='Sachsen', suburb='Zwickau', street='Hauptstr', full='right behind the church') city = self.name_token_set('ZWICKAU') state = self.name_token_set('SACHSEN') print(info) result = {k: eval(v[0]) for k,v in info['addr'].items()} assert result == {'city': city, 'suburb': city, 'state': state}
def run_sanitizer_on(whitelist, **kwargs): place = PlaceInfo( {'name': {k.replace('_', ':'): v for k, v in kwargs.items()}}) name, _ = PlaceSanitizer([{ 'step': 'tag-analyzer-by-language', 'mode': 'replace', 'whitelist': whitelist }]).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer']) for p in name) return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def run_sanitizer_replace(mode, country, **kwargs): place = PlaceInfo({ 'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country }) name, _ = PlaceSanitizer([{ 'step': 'tag-analyzer-by-language', 'use-defaults': mode, 'mode': 'replace' }]).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer']) for p in name) return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
def test_sanitizer_default(): san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}]) name, address = san.process_names( PlaceInfo({ 'name': { 'name:de:de': '1;2;3' }, 'address': { 'street': 'Bald' } })) assert len(name) == 3 assert all(isinstance(n, sanitizer.PlaceName) for n in name) assert all(n.kind == 'name' for n in name) assert all(n.suffix == 'de:de' for n in name) assert len(address) == 1 assert all(isinstance(n, sanitizer.PlaceName) for n in address)
def process_address(self, **kwargs): return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
def test_process_place_street(self): # legacy tokenizer only indexes known names self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}})) info = self.process_address(street='Grand Road') assert eval(info['street']) == self.name_token_set('#Grand Road')
def process_named_place(self, names): return self.analyzer.process_place(PlaceInfo({'name': names}))
def test_process_place_housenumbers_lists(analyzer): info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}})) assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pcode): analyzer.process_place(PlaceInfo({'address': {'postcode': pcode}})) assert not word_table.get_postcodes()