def run_sanitizer_on(**kwargs):
    place = PlaceInfo({'name': kwargs})
    name, _ = PlaceSanitizer([{
        'step': 'split-name-list'
    }]).process_names(place)

    return sorted([(p.name, p.kind, p.suffix) for p in name])
示例#2
0
    def index_places(self, worker, places):
        values = []
        for place in places:
            values.extend((place[x] for x in ('place_id', 'address')))
            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)
示例#3
0
def test_no_name_list():
    place = PlaceInfo({'address': {'housenumber': '3'}})
    name, address = PlaceSanitizer([{
        'step': 'split-name-list'
    }]).process_names(place)

    assert not name
    assert len(address) == 1
示例#4
0
    def index_places(self, worker, places):
        values = []
        for place in places:
            for field in ('place_id', 'name', 'address', 'linked_place_id'):
                values.append(place[field])
            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)
def test_no_names():
    place = PlaceInfo({'address': {'housenumber': '3'}})
    name, address = PlaceSanitizer([{
        'step': 'strip-brace-terms'
    }]).process_names(place)

    assert not name
    assert len(address) == 1
示例#6
0
    def test_process_place_housenumbers_simple(analyzer, hnr):
        info = analyzer.process_place(
            PlaceInfo({'address': {
                'housenumber': hnr
            }}))

        assert info['hnr'] == hnr
        assert info['hnr_tokens'].startswith("{")
示例#7
0
def test_process_place_names(analyzer, make_keywords):
    info = analyzer.process_place(
        PlaceInfo({'name': {
            'name': 'Soft bAr',
            'ref': '34'
        }}))

    assert info['names'] == '{1,2,3}'
示例#8
0
def sanitize_with_delimiter(delimiter, name):
    place = PlaceInfo({'name': {'name': name}})
    san = PlaceSanitizer([{
        'step': 'split-name-list',
        'delimiters': delimiter
    }])
    name, _ = san.process_names(place)

    return sorted([p.name for p in name])
示例#9
0
def test_sanitizer_empty_list(rules):
    san = sanitizer.PlaceSanitizer(rules)

    name, address = san.process_names(
        PlaceInfo({'name': {
            'name:de:de': '1;2;3'
        }}))

    assert len(name) == 1
    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
示例#10
0
    def test_process_place_street_from_cache(self):
        self.analyzer.process_place(PlaceInfo({'name': {
            'name': 'Grand Road'
        }}))
        self.process_address(street='Grand Road')

        # request address again
        info = self.process_address(street='Grand Road')

        assert eval(info['street']) == self.name_token_set('#Grand Road')
示例#11
0
    def test_process_place_housenumbers_duplicates(analyzer):
        info = analyzer.process_place(
            PlaceInfo({
                'address': {
                    'housenumber': '134',
                    'conscriptionnumber': '134',
                    'streetnumber': '99a'
                }
            }))

        assert set(info['hnr'].split(';')) == set(('134', '99a'))
示例#12
0
    def run_sanitizer_on(country, **kwargs):
        place = PlaceInfo({
            'name': {k.replace('_', ':'): v
                     for k, v in kwargs.items()},
            'country_code': country
        })
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language'
        }]).process_names(place)

        return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
示例#13
0
    def test_country_name(self, word_table):
        place = PlaceInfo({'name' : {'name': 'Norge'},
                           'country_code': 'no',
                           'rank_address': 4,
                           'class': 'boundary',
                           'type': 'administrative'})

        info = self.analyzer.process_place(place)

        self.expect_name_terms(info, '#norge', 'norge')
        assert word_table.get_country() == {('no', ' norge')}
示例#14
0
    def test_missing_country(self):
        place = PlaceInfo({'name': {'name': 'something'}})
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'use-defaults': 'all',
            'mode': 'replace'
        }]).process_names(place)

        assert len(name) == 1
        assert name[0].name == 'something'
        assert name[0].suffix is None
        assert 'analyzer' not in name[0].attr
示例#15
0
    def test_process_place_multiple_street_tags(self):
        self.analyzer.process_place(
            PlaceInfo({'name': {
                'name': 'Grand Road',
                'ref': '05989'
            }}))
        info = self.process_address(**{
            'street': 'Grand Road',
            'street:sym_ul': '05989'
        })

        assert eval(info['street']) == self.name_token_set(
            '#Grand Road', '#05989')
示例#16
0
 def add_country_names(self, country_code, names):
     """ Add names for the given country to the search index.
     """
     # Make sure any name preprocessing for country names applies.
     info = PlaceInfo({
         'name': names,
         'country_code': country_code,
         'rank_address': 4,
         'class': 'boundary',
         'type': 'administrative'
     })
     self._add_country_full_names(country_code,
                                  self.sanitizer.process_names(info)[0])
示例#17
0
    def test_process_place_address_terms(self):
        for name in ('Zwickau', 'Haupstraße', 'Sachsen'):
            self.analyzer.process_place(PlaceInfo({'name': {'name' : name}}))
        info = self.process_address(country='de', city='Zwickau', state='Sachsen',
                                    suburb='Zwickau', street='Hauptstr',
                                    full='right behind the church')

        city = self.name_token_set('ZWICKAU')
        state = self.name_token_set('SACHSEN')

        print(info)
        result = {k: eval(v[0]) for k,v in info['addr'].items()}

        assert result == {'city': city, 'suburb': city, 'state': state}
示例#18
0
    def run_sanitizer_on(whitelist, **kwargs):
        place = PlaceInfo(
            {'name': {k.replace('_', ':'): v
                      for k, v in kwargs.items()}})
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'mode': 'replace',
            'whitelist': whitelist
        }]).process_names(place)

        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
        assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer'])
                   for p in name)

        return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
示例#19
0
    def run_sanitizer_replace(mode, country, **kwargs):
        place = PlaceInfo({
            'name': {k.replace('_', ':'): v
                     for k, v in kwargs.items()},
            'country_code': country
        })
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'use-defaults': mode,
            'mode': 'replace'
        }]).process_names(place)

        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
        assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer'])
                   for p in name)

        return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
示例#20
0
def test_sanitizer_default():
    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])

    name, address = san.process_names(
        PlaceInfo({
            'name': {
                'name:de:de': '1;2;3'
            },
            'address': {
                'street': 'Bald'
            }
        }))

    assert len(name) == 3
    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
    assert all(n.kind == 'name' for n in name)
    assert all(n.suffix == 'de:de' for n in name)

    assert len(address) == 1
    assert all(isinstance(n, sanitizer.PlaceName) for n in address)
示例#21
0
 def process_address(self, **kwargs):
     return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
示例#22
0
    def test_process_place_street(self):
        # legacy tokenizer only indexes known names
        self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
        info = self.process_address(street='Grand Road')

        assert eval(info['street']) == self.name_token_set('#Grand Road')
示例#23
0
 def process_named_place(self, names):
     return self.analyzer.process_place(PlaceInfo({'name': names}))
示例#24
0
    def test_process_place_housenumbers_lists(analyzer):
        info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}}))

        assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
示例#25
0
def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table,
                                    pcode):
    analyzer.process_place(PlaceInfo({'address': {'postcode': pcode}}))

    assert not word_table.get_postcodes()