def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table): status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) states = [ nominatim.tools.replication.UpdateState.NO_CHANGES, nominatim.tools.replication.UpdateState.UP_TO_DATE ] monkeypatch.setattr(nominatim.tools.replication, 'update', lambda *args, **kwargs: states.pop()) index_mock = MockParamCapture() monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock) monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock) sleep_mock = MockParamCapture() monkeypatch.setattr(time, 'sleep', sleep_mock) with pytest.raises(IndexError): call_nominatim('replication') assert index_mock.called == 2 assert sleep_mock.called == 1 assert sleep_mock.last_args[0] == 60
def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn): status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345) monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, "get_state_info", lambda self: None) assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
def test_update_already_indexed(temp_db_conn): status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False) assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \ == nominatim.tools.replication.UpdateState.MORE_PENDING
def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn, status_table, state): status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) func_mock = mock_func_factory(nominatim.tools.replication, 'update') assert 0 == call_nominatim('replication', '--once', '--no-index')
def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn, status_table, state, retval): status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) func_mock = MockParamCapture(retval=state) monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock) assert retval == call_nominatim('replication', '--once', '--no-index')
def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn, server_sequence, result): date = dt.datetime.now(dt.timezone.utc) status.set_status(temp_db_conn, date, seq=345) monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, "get_state_info", lambda self: OsmosisState(server_sequence, date)) assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
def _set_database_date(dsn): """ Determine the database date and set the status accordingly. """ with connect(dsn) as conn: try: dbdate = status.compute_database_date(conn) status.set_status(conn, dbdate) LOG.info('Database is at %s.', dbdate) except Exception as exc: # pylint: disable=broad-except LOG.error('Cannot determine date of database: %s', exc) properties.set_property(conn, 'database_version', '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
def test_update_no_data_no_sleep(monkeypatch, temp_db_conn, update_options): date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1) status.set_status(temp_db_conn, date, seq=34) monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, "apply_diffs", lambda *args, **kwargs: None) sleeptime = [] monkeypatch.setattr(time, 'sleep', sleeptime.append) assert nominatim.tools.replication.update(temp_db_conn, update_options) \ == nominatim.tools.replication.UpdateState.NO_CHANGES assert not sleeptime
def update(conn, options): """ Update database from the next batch of data. Returns the state of updates according to `UpdateState`. """ startdate, startseq, indexed = status.get_status(conn) if startseq is None: LOG.error("Replication not set up. " "Please run 'nominatim replication --init' first.") raise UsageError("Replication not set up.") if not indexed and options['indexed_only']: LOG.info("Skipping update. There is data that needs indexing.") return UpdateState.MORE_PENDING last_since_update = dt.datetime.now(dt.timezone.utc) - startdate update_interval = dt.timedelta(seconds=options['update_interval']) if last_since_update < update_interval: duration = (update_interval - last_since_update).seconds LOG.warning("Sleeping for %s sec before next update.", duration) time.sleep(duration) if options['import_file'].exists(): options['import_file'].unlink() # Read updates into file. repl = ReplicationServer(options['base_url']) outhandler = WriteHandler(str(options['import_file'])) endseq = repl.apply_diffs(outhandler, startseq + 1, max_size=options['max_diff_size'] * 1024) outhandler.close() if endseq is None: return UpdateState.NO_CHANGES # Consume updates with osm2pgsql. options['append'] = True options['disable_jit'] = conn.server_version_tuple() >= (11, 0) run_osm2pgsql(options) # Write the current status to the file endstate = repl.get_state_info(endseq) status.set_status(conn, endstate.timestamp if endstate else None, seq=endseq, indexed=False) return UpdateState.UP_TO_DATE
def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options): date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30) status.set_status(temp_db_conn, date, seq=34) monkeypatch.setattr(nominatim.tools.replication.ReplicationServer, "apply_diffs", lambda *args, **kwargs: None) sleeptime = [] monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s)) assert nominatim.tools.replication.update(temp_db_conn, update_options) \ == nominatim.tools.replication.UpdateState.NO_CHANGES assert len(sleeptime) == 1 assert sleeptime[0] < 3600 assert sleeptime[0] > 0
def init_replication(conn, base_url): """ Set up replication for the server at the given base URL. """ LOG.info("Using replication source: %s", base_url) date = status.compute_database_date(conn) # margin of error to make sure we get all data date -= dt.timedelta(hours=3) repl = ReplicationServer(base_url) seq = repl.timestamp_to_sequence(date) if seq is None: LOG.fatal("Cannot reach the configured replication service '%s'.\n" "Does the URL point to a directory containing OSM update data?", base_url) raise UsageError("Failed to reach replication service") status.set_status(conn, date=date, seq=seq) LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
def test_check_for_updates_seq_not_set(temp_db_conn): status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc)) assert nominatim.tools.replication.check_for_updates( temp_db_conn, 'https://test.io') == 254
def run(args): # pylint: disable=too-many-statements from ..tools import database_import from ..tools import refresh from ..indexer.indexer import Indexer from ..tools import postcodes if args.osm_file and not Path(args.osm_file).is_file(): LOG.fatal("OSM file '%s' does not exist.", args.osm_file) raise UsageError('Cannot access file.') if args.continue_at is None: database_import.setup_database_skeleton( args.config.get_libpq_dsn(), args.data_dir, args.no_partitions, rouser=args.config.DATABASE_WEBUSER) LOG.warning('Installing database module') with connect(args.config.get_libpq_dsn()) as conn: database_import.install_module( args.module_dir, args.project_dir, args.config.DATABASE_MODULE_PATH, conn=conn) LOG.warning('Importing OSM data file') database_import.import_osm_data(Path(args.osm_file), args.osm2pgsql_options(0, 1), drop=args.no_updates, ignore_errors=args.ignore_errors) with connect(args.config.get_libpq_dsn()) as conn: LOG.warning('Create functions (1st pass)') refresh.create_functions(conn, args.config, args.sqllib_dir, False, False) LOG.warning('Create tables') database_import.create_tables(conn, args.config, args.sqllib_dir, reverse_only=args.reverse_only) refresh.load_address_levels_from_file( conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) LOG.warning('Create functions (2nd pass)') refresh.create_functions(conn, args.config, args.sqllib_dir, False, False) LOG.warning('Create table triggers') database_import.create_table_triggers(conn, args.config, args.sqllib_dir) LOG.warning('Create partition tables') database_import.create_partition_tables( conn, args.config, args.sqllib_dir) LOG.warning('Create functions (3rd pass)') refresh.create_functions(conn, args.config, args.sqllib_dir, False, False) LOG.warning('Importing wikipedia importance data') data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), data_path) > 0: LOG.error('Wikipedia importance dump file not found. ' 'Will be using default importances.') if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Initialise tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.truncate_data_tables( conn, args.config.MAX_WORD_FREQUENCY) LOG.warning('Load data into placex table') database_import.load_data(args.config.get_libpq_dsn(), args.data_dir, args.threads or psutil.cpu_count() or 1) LOG.warning('Calculate postcodes') postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): if args.continue_at is not None and args.continue_at != 'load-data': with connect(args.config.get_libpq_dsn()) as conn: SetupAll._create_pending_index( conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), args.threads or psutil.cpu_count() or 1) indexer.index_full(analyse=not args.index_noanalyse) LOG.warning('Post-process tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_search_indices(conn, args.config, args.sqllib_dir, drop=args.no_updates) LOG.warning('Create search index for default country names.') database_import.create_country_names(conn, args.config) webdir = args.project_dir / 'website' LOG.warning('Setup website at %s', webdir) refresh.setup_website(webdir, args.phplib_dir, args.config) with connect(args.config.get_libpq_dsn()) as conn: try: dbdate = status.compute_database_date(conn) status.set_status(conn, dbdate) LOG.info('Database is at %s.', dbdate) except Exception as exc: # pylint: disable=broad-except LOG.error('Cannot determine date of database: %s', exc) properties.set_property( conn, 'database_version', '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) return 0
def init_status(temp_db_conn, status_table): status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) return 1
def run(args): # pylint: disable=too-many-statements from ..tools import database_import, tiger_data from ..tools import refresh if args.create_db: LOG.warning('Create DB') database_import.create_db(args.config.get_libpq_dsn()) if args.setup_db: LOG.warning('Setup DB') with connect(args.config.get_libpq_dsn()) as conn: database_import.setup_extensions(conn) database_import.install_module( args.module_dir, args.project_dir, args.config.DATABASE_MODULE_PATH, conn=conn) database_import.import_base_data(args.config.get_libpq_dsn(), args.data_dir, args.no_partitions) if args.import_data: LOG.warning('Import data') if not args.osm_file: raise UsageError('Missing required --osm-file argument') database_import.import_osm_data(Path(args.osm_file), args.osm2pgsql_options(0, 1), drop=args.drop, ignore_errors=args.ignore_errors) if args.create_tables: LOG.warning('Create Tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_tables(conn, args.config, args.sqllib_dir, args.reverse_only) refresh.load_address_levels_from_file( conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) refresh.create_functions(conn, args.config, args.sqllib_dir, enable_diff_updates=False) database_import.create_table_triggers(conn, args.config, args.sqllib_dir) if args.create_partition_tables: LOG.warning('Create Partition Tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_partition_tables( conn, args.config, args.sqllib_dir) if args.load_data: LOG.warning('Load data') with connect(args.config.get_libpq_dsn()) as conn: database_import.truncate_data_tables( conn, args.config.MAX_WORD_FREQUENCY) database_import.load_data(args.config.get_libpq_dsn(), args.data_dir, args.threads or 1) with connect(args.config.get_libpq_dsn()) as conn: try: status.set_status(conn, status.compute_database_date(conn)) except Exception as exc: # pylint: disable=broad-except LOG.error('Cannot determine date of database: %s', exc) if args.index: LOG.warning('Indexing') from ..indexer.indexer import Indexer indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1) indexer.index_full() if args.create_search_indices: LOG.warning('Create Search indices') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) if args.tiger_data: LOG.warning('Tiger data') tiger_data.add_tiger_data(args.config.get_libpq_dsn(), args.tiger_data, args.threads or 1, args.config, args.sqllib_dir) if args.create_country_names: LOG.warning('Create search index for default country names.') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_country_names(conn, args.config)