Пример #1
0
def iter_changeset_stream(
        start_sqn=None,
        base_url='https://planet.openstreetmap.org/replication/changesets',
        expected_interval=60,
        parse_timestamps=True,
        state_dir=None):
    """Start processing an OSM changeset stream and yield one (action, primitive) tuple
    at a time to the caller."""

    # This is a lot like the other osm_stream except there's no
    # state file for each of the diffs, so just push ahead until
    # we run into a 404.

    # If the user specifies a state_dir, read the state from the statefile there
    if state_dir:
        if not os.path.exists(state_dir):
            raise Exception('Specified state_dir "%s" doesn\'t exist.' %
                            state_dir)

        if os.path.exists('%s/state.yaml' % state_dir):
            with open('%s/state.yaml' % state_dir, 'r') as f:
                state = readState(f, ': ')
                start_sqn = state['sequence']

    # If no start_sqn, assume to start from the most recent changeset file
    if not start_sqn:
        u = requests.get('%s/state.yaml' % base_url)
        u.raise_for_status()
        state = readState(u.text, ': ')
        sequenceNumber = int(state['sequence'])
    else:
        sequenceNumber = int(start_sqn)

    interval_fudge = 0.0
    while True:
        sqnStr = str(sequenceNumber).zfill(9)
        url = '%s/%s/%s/%s.osm.gz' % (base_url, sqnStr[0:3], sqnStr[3:6],
                                      sqnStr[6:9])

        delay = 1.0
        while True:
            content = requests.get(url)

            if content.status_code == 404:
                time.sleep(delay)
                delay = min(delay * 2, 13)
                interval_fudge += delay
                continue

            content = io.BytesIO(content.content)
            gzipper = gzip.GzipFile(fileobj=content)
            interval_fudge -= (interval_fudge / 2.0)
            break

        obj = None
        for event, elem in etree.iterparse(gzipper, events=('start', 'end')):
            if event == 'start':
                if elem.tag == 'changeset':
                    obj = model.Changeset(
                        int(elem.attrib['id']),
                        isoToDatetime(elem.attrib.get('created_at'))
                        if parse_timestamps else elem.attrib.get('created_at'),
                        isoToDatetime(elem.attrib.get('closed_at'))
                        if parse_timestamps else elem.attrib.get('closed_at'),
                        maybeBool(elem.attrib['open']),
                        maybeFloat(elem.get('min_lat')),
                        maybeFloat(elem.get('max_lat')),
                        maybeFloat(elem.get('min_lon')),
                        maybeFloat(elem.get('max_lon')),
                        elem.attrib.get('user'),
                        maybeInt(elem.attrib.get('uid')), [])
                elif elem.tag == 'tag':
                    obj.tags.append(
                        model.Tag(elem.attrib['k'], elem.attrib['v']))
            elif event == 'end':
                if elem.tag == 'changeset':
                    yield obj
                    obj = None

        yield model.Finished(sequenceNumber, None)

        sequenceNumber += 1

        if state_dir:
            with open('%s/state.yaml' % state_dir, 'w') as f:
                f.write('sequence: %d' % sequenceNumber)
Пример #2
0
def iter_osm_file(f, parse_timestamps=True):
    """Parse a file-like containing OSM XML and yield one OSM primitive at a time
    to the caller."""

    obj = None
    for event, elem in etree.iterparse(f, events=('start', 'end')):
        if event == 'start':
            if elem.tag == 'node':
                obj = model.Node(
                    int(elem.attrib['id']), maybeInt(elem.get('version')),
                    maybeInt(elem.get('changeset')), elem.attrib.get('user'),
                    maybeInt(elem.attrib.get('uid')),
                    maybeBool(elem.attrib.get('visible')),
                    isoToDatetime(elem.attrib.get('timestamp'))
                    if parse_timestamps else elem.attrib.get('timestamp'),
                    maybeFloat(elem.get('lat')), maybeFloat(elem.get('lon')),
                    [])
            elif elem.tag == 'way':
                obj = model.Way(
                    int(elem.attrib['id']), maybeInt(elem.get('version')),
                    maybeInt(elem.get('changeset')), elem.attrib.get('user'),
                    maybeInt(elem.attrib.get('uid')),
                    maybeBool(elem.attrib.get('visible')),
                    isoToDatetime(elem.attrib.get('timestamp')) if
                    parse_timestamps else elem.attrib.get('timestamp'), [], [])
            elif elem.tag == 'tag':
                obj.tags.append(model.Tag(elem.attrib['k'], elem.attrib['v']))
            elif elem.tag == 'nd':
                obj.nds.append(int(elem.attrib['ref']))
            elif elem.tag == 'relation':
                obj = model.Relation(
                    int(elem.attrib['id']), maybeInt(elem.get('version')),
                    maybeInt(elem.get('changeset')), elem.attrib.get('user'),
                    maybeInt(elem.attrib.get('uid')),
                    maybeBool(elem.attrib.get('visible')),
                    isoToDatetime(elem.attrib.get('timestamp')) if
                    parse_timestamps else elem.attrib.get('timestamp'), [], [])
            elif elem.tag == 'member':
                obj.members.append(
                    model.Member(elem.attrib['type'], int(elem.attrib['ref']),
                                 elem.attrib['role']))
            elif elem.tag == 'changeset':
                obj = model.Changeset(
                    int(elem.attrib['id']),
                    isoToDatetime(elem.attrib.get('created_at'))
                    if parse_timestamps else elem.attrib.get('created_at'),
                    isoToDatetime(elem.attrib.get('closed_at'))
                    if parse_timestamps else elem.attrib.get('closed_at'),
                    maybeBool(elem.attrib['open']),
                    maybeFloat(elem.get('min_lat')),
                    maybeFloat(elem.get('max_lat')),
                    maybeFloat(elem.get('min_lon')),
                    maybeFloat(elem.get('max_lon')), elem.attrib.get('user'),
                    maybeInt(elem.attrib.get('uid')), [])
        elif event == 'end':
            if elem.tag == 'node':
                yield obj
                obj = None
            elif elem.tag == 'way':
                yield obj
                obj = None
            elif elem.tag == 'relation':
                yield obj
                obj = None
            elif elem.tag == 'changeset':
                yield obj
                obj = None

        elem.clear()
        while elem.getprevious() is not None:
            del elem.getparent()[0]