def test_load_no_tokenizer_dir(test_config): factory.create_tokenizer(test_config) test_config.project_dir = test_config.project_dir / 'foo' with pytest.raises(UsageError): factory.get_tokenizer_for_db(test_config)
def test_load_missing_propoerty(temp_db_cursor, test_config): factory.create_tokenizer(test_config) temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties") with pytest.raises(UsageError): factory.get_tokenizer_for_db(test_config)
def setup_api_db(self): """ Setup a test against the API test database. """ self.write_nominatim_config(self.api_test_db) if not self.api_db_done: self.api_db_done = True if not self._reuse_or_drop_db(self.api_test_db): testdata = Path('__file__') / '..' / '..' / 'testdb' self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) try: self.run_nominatim('import', '--osm-file', str(self.api_test_file)) self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve())) self.run_nominatim('freeze') if self.tokenizer != 'icu': phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) else: csv_path = str((testdata / 'full_en_phrases_test.csv').resolve()) self.run_nominatim('special-phrases', '--import-from-csv', csv_path) except: self.db_drop_database(self.api_test_db) raise tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
def test_load_tokenizer(test_config): factory.create_tokenizer(test_config) tokenizer = factory.get_tokenizer_for_db(test_config) assert isinstance(tokenizer, DummyTokenizer) assert tokenizer.init_state == "loaded"
def setup_api_db(self): """ Setup a test against the API test database. """ self.write_nominatim_config(self.api_test_db) if not self.api_db_done: self.api_db_done = True if not self._reuse_or_drop_db(self.api_test_db): testdata = Path('__file__') / '..' / '..' / 'testdb' self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) try: self.run_nominatim('import', '--osm-file', str(self.api_test_file)) if self.tokenizer != 'legacy_icu': self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve())) self.run_nominatim('freeze') if self.tokenizer != 'legacy_icu': phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) else: # XXX Temporary use the wiki while there is no CSV import # available. self.test_env['NOMINATIM_LANGUAGES'] = 'en' self.run_nominatim('special-phrases', '--import-from-wiki') del self.test_env['NOMINATIM_LANGUAGES'] except: self.db_drop_database(self.api_test_db) raise tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
def test_load_tokenizer(temp_db_conn, test_config, tokenizer_mock, property_table): factory.create_tokenizer(test_config) tokenizer = factory.get_tokenizer_for_db(test_config) assert isinstance(tokenizer, DummyTokenizer) assert tokenizer.init_state == "loaded"
def setup_unknown_db(self): """ Setup a test against a non-existing database. """ # The tokenizer needs an existing database to function. # So start with the usual database class _Context: db = None context = _Context() self.setup_db(context) tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False) # Then drop the DB again self.teardown_db(context, force_drop=True)
def import_and_index_data_from_place_table(context): """ Import data previously set up in the place table. """ nctx = context.nominatim tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config()) context.nominatim.copy_from_place(context.db) # XXX use tool function as soon as it is ported with context.db.cursor() as cur: with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd: cur.execute(fd.read()) cur.execute(""" INSERT INTO location_postcode (place_id, indexed_status, country_code, postcode, geometry) SELECT nextval('seq_place'), 1, country_code, upper(trim (both ' ' from address->'postcode')) as pc, ST_Centroid(ST_Collect(ST_Centroid(geometry))) FROM placex WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%' AND geometry IS NOT null GROUP BY country_code, pc""") # Call directly as the refresh function does not include postcodes. indexer.LOG.setLevel(logging.ERROR) indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False) check_database_integrity(context)
def test_setup_tokenizer_dir_exists(test_config): (test_config.project_dir / 'tokenizer').mkdir() tokenizer = factory.create_tokenizer(test_config) assert isinstance(tokenizer, DummyTokenizer) assert tokenizer.init_state == "new"
def test_setup_dummy_tokenizer(temp_db_conn, test_config): tokenizer = factory.create_tokenizer(test_config) assert isinstance(tokenizer, DummyTokenizer) assert tokenizer.init_state == "new" assert (test_config.project_dir / 'tokenizer').is_dir() assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
def install_legacy_tokenizer(conn, config, **_): """ Setup legacy tokenizer. If no other tokenizer has been configured yet, then create the configuration for the backwards-compatible legacy tokenizer """ if properties.get_property(conn, 'tokenizer') is None: with conn.cursor() as cur: for table in ('placex', 'location_property_osmline'): has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns WHERE table_name = %s and column_name = 'token_info'""", (table, )) if has_column == 0: cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, module_name='legacy') tokenizer.migrate_database(config)
def test_setup_bad_tokenizer_name(def_config, tmp_path, monkeypatch): def_config.project_dir = tmp_path monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy') with pytest.raises(UsageError): factory.create_tokenizer(def_config)
def test_setup_tokenizer_dir_failure(test_config): (test_config.project_dir / 'tokenizer').write_text("foo") with pytest.raises(UsageError): factory.create_tokenizer(test_config)
def test_tokenizer(tokenizer_mock, project_env): return factory.create_tokenizer(project_env)
def test_tokenizer(tokenizer_mock, def_config, tmp_path): def_config.project_dir = tmp_path return factory.create_tokenizer(def_config)
def test_setup_bad_tokenizer_name(test_config, monkeypatch): monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy') with pytest.raises(UsageError): factory.create_tokenizer(test_config)