Exemplo n.º 1
0
def update(conn, options):
    """ Update database from the next batch of data. Returns the state of
        updates according to `UpdateState`.
    """
    startdate, startseq, indexed = status.get_status(conn)

    if startseq is None:
        LOG.error("Replication not set up. "
                  "Please run 'nominatim replication --init' first.")
        raise UsageError("Replication not set up.")

    if not indexed and options['indexed_only']:
        LOG.info("Skipping update. There is data that needs indexing.")
        return UpdateState.MORE_PENDING

    last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
    update_interval = dt.timedelta(seconds=options['update_interval'])
    if last_since_update < update_interval:
        duration = (update_interval - last_since_update).seconds
        LOG.warning("Sleeping for %s sec before next update.", duration)
        time.sleep(duration)

    if options['import_file'].exists():
        options['import_file'].unlink()

    # Read updates into file.
    repl = ReplicationServer(options['base_url'])

    outhandler = WriteHandler(str(options['import_file']))
    endseq = repl.apply_diffs(outhandler,
                              startseq + 1,
                              max_size=options['max_diff_size'] * 1024)
    outhandler.close()

    if endseq is None:
        return UpdateState.NO_CHANGES

    # Consume updates with osm2pgsql.
    options['append'] = True
    options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
    run_osm2pgsql(options)

    # Write the current status to the file
    endstate = repl.get_state_info(endseq)
    status.set_status(conn,
                      endstate.timestamp if endstate else None,
                      seq=endseq,
                      indexed=False)

    return UpdateState.UP_TO_DATE
Exemplo n.º 2
0
def init_replication(conn, base_url):
    """ Set up replication for the server at the given base URL.
    """
    LOG.info("Using replication source: %s", base_url)
    date = status.compute_database_date(conn)

    # margin of error to make sure we get all data
    date -= dt.timedelta(hours=3)

    repl = ReplicationServer(base_url)

    seq = repl.timestamp_to_sequence(date)

    if seq is None:
        LOG.fatal("Cannot reach the configured replication service '%s'.\n"
                  "Does the URL point to a directory containing OSM update data?",
                  base_url)
        raise UsageError("Failed to reach replication service")

    status.set_status(conn, date=date, seq=seq)

    LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
Exemplo n.º 3
0
def check_for_updates(conn, base_url):
    """ Check if new data is available from the replication service at the
        given base URL.
    """
    _, seq, _ = status.get_status(conn)

    if seq is None:
        LOG.error("Replication not set up. "
                  "Please run 'nominatim replication --init' first.")
        return 254

    state = ReplicationServer(base_url).get_state_info()

    if state is None:
        LOG.error("Cannot get state for URL %s.", base_url)
        return 253

    if state.sequence <= seq:
        LOG.warning("Database is up to date.")
        return 2

    LOG.warning("New data available (%i => %i).", seq, state.sequence)
    return 0
Exemplo n.º 4
0
    def run(self):
        repserv = ReplicationServer(self.options.osm_updater_url)
        last_time = datetime.utcnow()
        if self.options.seqid:
            seqid = self.options.seqid
        else:
            seqid = self.get_osm_schema_ver(repserv)
            if seqid is None:
                raise Exception('Unable to determine sequence ID')

        log.info(f'Initial sequence id: {seqid}')
        state = None
        last_seqid = seqid

        while True:

            # must not read data newer than the published sequence id
            # or we might end up reading partial data

            sleep = True
            if state is None:
                state = repserv.get_state_info()
                if state is not None and seqid + 2 < state.sequence:
                    log.info(
                        f'Replication server has data up to #{state.sequence}')

            if state is not None and seqid <= state.sequence:
                try:
                    diffdata = repserv.get_diff_block(seqid)
                except:
                    diffdata = ''

                # We assume there are no empty diff files
                if len(diffdata) > 0:
                    log.debug("Downloaded change %d. (size=%d)" %
                              (seqid, len(diffdata)))

                    if self.options.addWayLoc:
                        self.apply_buffer(diffdata,
                                          repserv.diff_type,
                                          locations=True,
                                          idx=self.get_index_string())
                    else:
                        self.apply_buffer(diffdata, repserv.diff_type)

                    self.flush(seqid)

                    seqid += 1
                    sleep = False

            seconds_since_last = (datetime.utcnow() -
                                  last_time).total_seconds()
            if seconds_since_last > 60:
                log.info(
                    f'Processed {seqid - last_seqid - 1}, ' +
                    f'todo {(state.sequence - seqid + 1 if state else "???")};  {self.format_stats()}'
                )
                last_seqid = seqid - 1
                last_time = datetime.utcnow()

            if state is not None and seqid > state.sequence:
                state = None  # Refresh state

            if sleep:
                time.sleep(60)