Exemplo n.º 1
0
  def importWOFGeometries(self):
    cursor = self.db.cursor()
    itemCount = len(self.items)
    for type, items in self.progress(self.items.items(), total=itemCount):
      for id, item in items.items():
        if item.geometry is None:
          continue

        encodeStream = encodingstream.DeltaEncodingStream()
        encodeStream.encodeFeature({ 'id': id, 'geometry': item.geometry, 'properties': item.properties })

        nameId = 0
        if item.name in self.items['name']:
          nameId = self.items['name'][item.name].dbid
        cursor.execute('DELETE FROM entities WHERE country_id IS ? AND region_id IS ? AND county_id IS ? AND locality_id IS ? AND neighbourhood_id IS ? AND street_id IS ? AND (name_id IS NULL OR name_id=?)', (item.dbids.get('country', None), item.dbids.get('region', None), item.dbids.get('county', None), item.dbids.get('locality', None), item.dbids.get('neighbourhood', None), item.dbids.get('street', None), nameId))
        cursor.execute('INSERT INTO entities(country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, housenumbers, name_id, features, quadindex, rank) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (item.dbids.get('country', None), item.dbids.get('region', None), item.dbids.get('county', None), item.dbids.get('locality', None), item.dbids.get('neighbourhood', None), item.dbids.get('street', None), None, None, None, sqlite3.Binary(encodeStream.getData()), 0, self.calculateItemRank(item)))
    cursor.close()
Exemplo n.º 2
0
  def convertDatabase(self):
    self.db.execute("BEGIN")

    cursor = self.db.cursor()
    cursor1 = self.db.cursor()
    cursor2 = self.db.cursor()

    # Set type
    cursor.execute("ALTER TABLE entities ADD type INTEGER NOT NULL DEFAULT 0")

    cursor1.execute("SELECT COUNT(*) FROM entities")
    rowCount = cursor1.fetchone()[0]
    cursor1.execute("SELECT rowid, housenumbers, country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, NULL, name_id FROM entities")
    for row in self.progress(cursor1, total=rowCount):
      if row[-1]:
        type = CLASS_TABLE['name']
      elif row[1]:
        type = CLASS_TABLE['housenumber']
      elif row[-3]:
        type = CLASS_TABLE['street']
      elif row[-4]:
        type = CLASS_TABLE['neighbourhood']
      elif row[-5]:
        type = CLASS_TABLE['locality']
      elif row[-6]:
        type = CLASS_TABLE['county']
      elif row[-7]:
        type = CLASS_TABLE['region']
      elif row[-8]:
        type = CLASS_TABLE['country']
      cursor.execute("UPDATE entities SET type=? WHERE rowid=?", (type, row[0]))

    # Store name info in separate entitynames table
    cursor.execute("CREATE TABLE entitynames (entity_id INTEGER NOT NULL, name_id INTEGER NOT NULL)")
    for idx, field in enumerate(['country', 'region', 'county', 'locality', 'neighbourhood', 'street', 'postcode', 'name']):
      cursor.execute("INSERT INTO entitynames(entity_id, name_id) SELECT id, %s_id FROM entities WHERE %s_id IS NOT NULL" % (field, field))

    # Add entitycount field to names
    cursor.execute("CREATE INDEX entitynames_entity_name_id ON entitynames(entity_id, name_id)")
    cursor.execute("CREATE INDEX entitynames_name_id ON entitynames(name_id)")
    cursor.execute("ALTER TABLE names ADD entitycount INTEGER NOT NULL DEFAULT 0")
    cursor.execute("UPDATE names SET entitycount=(SELECT COUNT(*) FROM entitynames WHERE entitynames.name_id=names.id)")

    # Add namecount field to tokens
    cursor.execute("CREATE INDEX nametokens_token_name_id ON nametokens (token_id, name_id)")
    cursor.execute("ALTER TABLE tokens ADD namecount INTEGER NOT NULL DEFAULT 0")
    cursor.execute("UPDATE tokens SET namecount=(SELECT COUNT(*) FROM nametokens WHERE nametokens.token_id=tokens.id)")

    # Update housenumbers
    names = {}
    cursor1.execute("SELECT MAX(id) FROM names")
    nameId = cursor1.fetchone()[0] or 0
    cursor.execute("ALTER TABLE entities RENAME TO old_entities")
    cursor.execute("CREATE TABLE entities (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, type INTEGER NOT NULL, features BLOB NOT NULL, housenumbers BLOB NULL, quadindex INTEGER NOT NULL, rank INTEGER NOT NULL)")
    cursor.execute("INSERT INTO metadata(name, value) VALUES('rank_scale', ?)", (str(RANK_SCALE),))

    cursor1.execute("SELECT COUNT(*) FROM old_entities")
    rowCount = cursor1.fetchone()[0]
    cursor1.execute("SELECT id, type, quadindex, rank, housenumbers, features FROM old_entities")
    for row1 in self.progress(cursor1, total=rowCount):
      encodeStream = encodingstream.DeltaEncodingStream()
      if row1[4]:
        houseNums = row1[4].split("|")
        for houseNum in houseNums:
          if houseNum not in names:
            nameId += 1
            names[houseNum] = nameId
            cursor.execute("INSERT INTO names(id, lang, name, type) VALUES(?, ?, ?, ?)", (nameId, None, houseNum, 9))
            for token in self.tokenizeName(houseNum):
              cursor2.execute("SELECT id FROM tokens WHERE token=?", (token,))
              for row2 in cursor2:
                cursor.execute("INSERT INTO nametokens(name_id, token_id) VALUES(?, ?)", (nameId, row2[0]))
          encodeStream.encodeNumber(names[houseNum])
      cursor.execute("INSERT INTO entities(id, type, quadindex, rank, features, housenumbers) VALUES(?, ?, ?, ?, ?, ?)", (row1[0], row1[1], row1[2], int(row1[3] * RANK_SCALE), row1[5], sqlite3.Binary(encodeStream.getData()) if encodeStream.getData() else None))

    cursor.execute("DROP TABLE old_entities")

    # Add lang to nametokens
    cursor.execute("ALTER TABLE nametokens ADD lang TEXT NULL")
    cursor1.execute("SELECT id, name, lang FROM names WHERE id IN (SELECT id FROM names WHERE lang IS NOT NULL)")
    rows1 = cursor1.fetchall()
    cursor.execute("DELETE FROM nametokens WHERE name_id IN (SELECT id FROM names WHERE lang IS NOT NULL)")
    for row1 in rows1:
      for token in self.tokenizeName(row1[1]):
        cursor.execute("INSERT INTO nametokens(name_id, token_id, lang) SELECT ?, id, ? FROM tokens WHERE token=?", (row1[0], row1[2], token))

    # Indices
    cursor.execute("DROP INDEX nametokens_token_id")

    cursor.execute("CREATE INDEX entities_id ON entities(id)")
    cursor.execute("CREATE INDEX entities_type ON entities(type)")
    cursor.execute("CREATE INDEX entities_quadindex ON entities(quadindex)")

    # Done
    cursor2.close()
    cursor1.close()
    cursor.close()

    self.db.commit()
Exemplo n.º 3
0
  def importPeliasAddress(self, peliasData, buildingsLocator):
    id, data = self.extractPeliasData(peliasData)
    if id is None or data is None:
      return

    # Find parent info from gazetter
    entity = self.Entity()
    if 'center_point' in data:
      hierarchy = self.wofLocator.findHierarchy((data['center_point']['lon'], data['center_point']['lat']))
      if hierarchy:
        for parentField, parentId in hierarchy[0].items():
          parentType = parentField[:-3]
          entity.dbids[parentType] = self.mapEntityParent(parentId, parentType)
      entity.geometry = { 'type': 'Point', 'coordinates': (data['center_point']['lon'], data['center_point']['lat']) }
    else:
      self.warning('No coordinates for entity: %d' % id)
      return

    # Check country
    if entity.dbids.get('country', None) is None:
      self.warning('No country for entity: %d' % id)
      return

    # Store address info
    if 'address_parts' in data:
      if data['address_parts'].get('street', None):
        entity.dbids['street'] = self.mapEntityName(data['address_parts']['street'], 'street')
      if data['address_parts'].get('number', None):
        if entity.dbids.get('street', None) is not None:
          self.importTokens(data['address_parts']['number'], 'housenumber', None)
          entity.housenumber = data['address_parts']['number']
        else:
          self.warning('Ignoring housenumber, as street info is missing: %d' % id)
      if data['address_parts'].get('zip', None):
        if entity.housenumber is not None and self.importPostcodes:
          entity.dbids['postcode'] = self.mapEntityName(data['address_parts']['zip'], 'postcode')

    # Extract optional name and geometry
    name = data.get('name', {}).get('default', '')
    if name.isnumeric():
      self.warning("Numeric name '%s' for entity: %d" % (name, id))
      return
    extraNames = []
    for key, val in data.get('name', {}).items():
      if key != 'default' and val and not val.isnumeric():
        extraNames.append((key, val))

    if entity.dbids.get('street', None) is not None:
      # Street info is present. Check if we have a building
      if entity.housenumber is not None and id in self.buildingsGeometry:
        entity.geometry = { 'type': 'Polygon', 'coordinates': [self.unpackCoordinates(self.buildingsGeometry[id])] }
      # Import names only if the name is not 'trivial' (streetname + housenumber)
      if name != '':
        streetNames = [data.get('address_parts', {}).get('street', '')]
        if entity.housenumber is not None:
          streetNames = ['%s %s' % (entity.housenumber, streetNames[0]), '%s %s' % (streetNames[0], entity.housenumber)]
        if name not in streetNames:
          entity.dbids['name'] = self.mapEntityName(name, 'name', extraNames)
    else:
      # Check if we have a street based on id.
      if entity.housenumber is None and id in self.streetsGeometry:
        entity.geometry = { 'type': 'LineString', 'coordinates': self.unpackCoordinates(self.streetsGeometry[id]) }
        entity.dbids['street'] = self.mapEntityName(name, 'street', extraNames)
      else:
        # Not a street, likely a POI.
        if name == '':
          self.warning('No name for entity: %d' % id)
          return
        # In case of POIs, we will try to locate the building (and thus get full address of the POI).
        if entity.geometry is not None:
          if entity.geometry['type'] == 'Point':
            buildings = buildingsLocator.findGeometry(entity.geometry)
            if len(buildings) > 0:
              firstStreetDbid, firstHousenumber = buildings[0]
              if all([firstStreetDbid == streetDbid for streetDbid, housenumber in buildings]):
                entity.dbids['street'] = firstStreetDbid
                if all([firstHousenumber == housenumber for streetDbid, housenumber in buildings]):
                  entity.housenumber = firstHousenumber
        entity.dbids['name'] = self.mapEntityName(name, 'name', extraNames)

    # Check entity validity
    if entity.geometry is None:
      self.warning('Failed to import geometry: %d' % id)
      return
    if self.clipBounds is not None:
      if not testClipBounds(calculateGeometryBounds(entity.geometry), self.clipBounds):
        self.warning('Geometry entity geometry outside of clip bounds: %d' % id)
        return

    # Try to merge data
    cursor = self.db.cursor()
    cursor.execute('SELECT id, features, housenumbers, postcode_id FROM entities WHERE country_id IS ? AND region_id IS ? AND county_id IS ? AND locality_id IS ? AND neighbourhood_id IS ? AND street_id IS ? AND name_id IS ? AND %s' % ('housenumbers IS NOT NULL' if entity.housenumber else 'housenumbers IS NULL'), (entity.dbids.get('country', None), entity.dbids.get('region', None), entity.dbids.get('county', None), entity.dbids.get('locality', None), entity.dbids.get('neighbourhood', None), entity.dbids.get('street', None), entity.dbids.get('name', None)))
    for row in cursor:
      categories = self.loadCategories(row[0])
      if set(categories) != set(data.get('category', [])):
        continue
      encodeStream = encodingstream.DeltaEncodingStream(row[1])
      encodeStream.encodeFeature({ 'id': id, 'geometry': entity.geometry, 'properties': {} })
      features = sqlite3.Binary(encodeStream.getData())
      housenumbers = row[2] + '|' + entity.housenumber.replace('|', ' ') if entity.housenumber else None
      cursor.execute('UPDATE entities SET features=?, housenumbers=?, postcode_id=? WHERE id=?', (features, housenumbers, row[3] or entity.dbids.get('postcode', None), row[0]))
      cursor.close()
      return

    # Merging not possible, store
    encodeStream = encodingstream.DeltaEncodingStream()
    encodeStream.encodeFeature({ 'id': id, 'geometry': entity.geometry, 'properties': {} })
    features = sqlite3.Binary(encodeStream.getData())
    housenumbers = entity.housenumber.replace('|', ' ') if entity.housenumber else None
    cursor.execute('INSERT INTO entities(country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, name_id, housenumbers, features, quadindex, rank) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (entity.dbids.get('country', None), entity.dbids.get('region', None), entity.dbids.get('county', None), entity.dbids.get('locality', None), entity.dbids.get('neighbourhood', None), entity.dbids.get('street', None), entity.dbids.get('postcode', None), entity.dbids.get('name', None), housenumbers, features, 0, self.calculateEntityRank(entity)))
    self.storeCategories(cursor.lastrowid, data.get('category', []))
    cursor.close()
Exemplo n.º 4
0
  def postProcessFeatures(self):
    # Calculate origin for geometry
    cursor1 = self.db.cursor()
    cursor1.execute('SELECT features FROM entities')
    geomOrigin = (0, 0)
    featureCounter = 0
    for row in cursor1:
      encodeStream = encodingstream.DeltaEncodingStream(row[0])
      while not encodeStream.eof():
        encodeStream.prevCoord = [0, 0]
        encodeStream.prevNumber = 0
        feature = encodeStream.decodeFeature()
        featureCounter += 1
        try:
          bounds = calculateGeometryBounds(feature['geometry'])
        except:
          continue
        origin = ((bounds[0] + bounds[2]) * 0.5, (bounds[1] + bounds[3]) * 0.5)
        geomOrigin = (geomOrigin[0] + (origin[0] - geomOrigin[0]) / featureCounter, geomOrigin[1] + (origin[1] - geomOrigin[1]) / featureCounter)

    # Create inverse mapping lists (dbid -> OSM id)
    itemOsmIds = {}
    for type, items in self.items.items():
      itemOsmIds[type] = {}
      for osmId, item in items.items():
        itemOsmIds[type][item.dbid] = osmId

    # Process geometries
    cursor1.execute('SELECT COUNT(*) FROM entities')
    rowCount = cursor1.fetchone()[0]
    cursor1.execute('SELECT id, features, housenumbers, country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, housenumbers, name_id FROM entities ORDER BY id')
    cursor2 = self.db.cursor()
    for row in self.progress(cursor1, total=rowCount):
      entityId = row[0]

      # Read features and housenumbers into interleaved lists
      featureCollections = []
      housenumbers = []
      featureCounter = 0
      encodeStream = encodingstream.DeltaEncodingStream(row[1])
      while not encodeStream.eof():
        encodeStream.prevCoord = [0, 0]
        encodeStream.prevNumber = 0
        feature = encodeStream.decodeFeature()
        featureCounter += 1
        if not validateGeometry(feature['geometry']):
          self.warning('Geometry not valid: %d' % feature['id'])

        if row[2]:
          housenumber = self.normalizeHouseNumber(row[2].split('|')[featureCounter - 1])
          if housenumber not in housenumbers:
            housenumbers.append(housenumber)
            featureCollections.append([feature])
          else:
            featureCollections[housenumbers.index(housenumber)] += [feature]
        else:
          featureCollections.append([feature])

      # Add housenumbers to regex builder
      #if housenumbers:
      #  for housenumber in housenumbers:
      #    self.housenumRegexBuilder.add(housenumber)

      # Try to simplify and merge features
      for featureCollection in featureCollections:
        if not self.importIds:
          for i in range(0, len(featureCollection)):
            featureCollection[i]['id'] = 0

        i = 0
        while i < len(featureCollection):
          currentFeature = featureCollection[i]
          featureIndices = [i + j for j, feature in enumerate(featureCollection[i:]) if feature['id'] == currentFeature['id'] and feature['properties'] == currentFeature['properties']]
          geometry = mergeGeometries([featureCollection[j]['geometry'] for j in featureIndices]) if len(featureIndices) != 1 else featureCollection[i]['geometry']
          geometry = simplifyGeometry(geometry, SIMPLIFICATION_FACTOR) if not housenumbers else geometry
          featureCollection[i] = { 'id': currentFeature['id'], 'geometry': geometry, 'properties': currentFeature['properties'] }
          for j in reversed(featureIndices[1:]):
            featureCollection.pop(j)
          i += 1

      # Encode features
      encodeStream = encodingstream.DeltaEncodingStream(None, geomOrigin)
      for featureCollection in featureCollections:
        encodeStream.encodeFeatureCollection(featureCollection)

      # Calculate quadindex of all the geometries
      geometries = []
      for featureCollection in featureCollections:
        geometries += [feature['geometry'] for feature in featureCollection]
      try:
        bounds = calculateGeometryBounds({ 'type': 'GeometryCollection', 'geometries': geometries })
        quadIndex = quadindex.calculateGeometryQuadIndex(bounds)
      except:
        cursor2.execute("DELETE FROM entities WHERE id=?", (entityId,))
        self.warning('Removing entity %d due to illegal geometry' % entityId)
        continue

      for idx, field in [(idx, field) for idx, field in enumerate([description[0] for description in cursor1.description]) if field.endswith('_id')]:
        type = field[:-3]
        id = itemOsmIds.get(type, {}).get(row[idx], None)
        if id is not None:
          self.items[type][id].geomBoundsList = mergeBoundsLists(self.items[type][id].geomBoundsList, [bounds])

      # Update database
      self.geomBounds = mergeBounds(self.geomBounds, bounds)
      cursor2.execute('UPDATE entities SET features=?, housenumbers=?, quadindex=? WHERE id=?', (sqlite3.Binary(encodeStream.getData()), '|'.join(housenumbers) if housenumbers else None, quadIndex, entityId))

    if self.geomBounds is not None:
      cursor1.execute("INSERT INTO metadata(name, value) VALUES('bounds', '%.16g,%.16g,%.16g,%.16g')" % self.geomBounds)
    cursor1.execute("INSERT INTO metadata(name, value) VALUES('origin', '%.16g,%.16g')" % geomOrigin)
    cursor1.execute("INSERT INTO metadata(name, value) VALUES('encoding_precision', '%.16g')" % encodingstream.PRECISION)
    cursor1.execute("INSERT INTO metadata(name, value) VALUES('quadindex_level', '%d')" % quadindex.MAX_LEVEL)
    #cursor1.execute("INSERT INTO metadata(name, value) VALUES('housenumber_regex', '%s')" % self.housenumRegexBuilder.build())

    cursor2.close()
    cursor1.close()