def extract_geonames(data_path, dump_path): alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"]) geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"]) dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz") extra_alt_name = {} for geoname in geonames: alt_name_list = [] if extra_alt_name.get("geoname_id") == geoname["geoname_id"]: alt_name_list.append(extra_alt_name) extra_alt_name = {} for alt_name in alt_names: if alt_name["geoname_id"] == geoname["geoname_id"]: alt_name_list.append(alt_name) else: extra_alt_name = alt_name break geoname["alternate_names"] = alt_name_list try: for col in ("latitude", "longitude"): geoname[col] = float(geoname[col]) except ValueError: ### busted coordinates continue centroid = [geoname["longitude"], geoname["latitude"]] population = None try: population = int(geoname["population"]) except ValueError: pass uri = "http://geonames.org/" + geoname["geoname_id"] names = [] alt_name_list.append({ "name": geoname["name"], "lang": "", "type": "preferred" }) for alt_name in alt_name_list: name_type = "" if alt_name.get("is_colloquial"): name_type = "colloquial" if alt_name.get("is_historic"): name_type = "historic" if alt_name.get("is_preferred"): name_type = "preferred" if alt_name.get("is_short"): name_type = "short" alt_name = { "lang": alt_name["lang"], "type": name_type, "name": alt_name["name"] } names.append(alt_name) ascii_name = transliterate(alt_name) if ascii_name: names.append(ascii_name) place = { "name": geoname["name"], "centroid": centroid, "feature_code": geoname["feature_code"], "geometry": {"type": "Point", "coordinates": centroid}, "is_primary": True, "source": geoname, "alternate": names, "updated": geoname["changed_at"], "population": population, "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place) dump.close()
def extract_geonames(data_path, dump_path): alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"]) geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"]) dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz") extra_alt_name = {} for geoname in geonames: alt_name_list = [] if extra_alt_name.get("geoname_id") == geoname["geoname_id"]: alt_name_list.append(extra_alt_name) extra_alt_name = {} for alt_name in alt_names: if alt_name["geoname_id"] == geoname["geoname_id"]: alt_name_list.append(alt_name) else: extra_alt_name = alt_name break geoname["alternate_names"] = alt_name_list try: for col in ("latitude", "longitude"): geoname[col] = float(geoname[col]) except ValueError: ### busted coordinates continue centroid = [geoname["longitude"], geoname["latitude"]] population = None try: population = int(geoname["population"]) except ValueError: pass uri = "http://geonames.org/" + geoname["geoname_id"] names = [] alt_name_list.append({ "name": geoname["name"], "lang": "", "type": "preferred" }) for alt_name in alt_name_list: name_type = "" if alt_name.get("is_colloquial"): name_type = "colloquial" if alt_name.get("is_historic"): name_type = "historic" if alt_name.get("is_preferred"): name_type = "preferred" if alt_name.get("is_short"): name_type = "short" alt_name = { "lang": alt_name["lang"], "type": name_type, "name": alt_name["name"] } names.append(alt_name) ascii_name = transliterate(alt_name) if ascii_name: names.append(ascii_name) place = { "name": geoname["name"], "centroid": centroid, "feature_code": geoname["feature_code"], "geometry": { "type": "Point", "coordinates": centroid }, "is_primary": True, "source": geoname, "alternate": names, "updated": geoname["changed_at"], "population": population, "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place) dump.close()
def extract_osm(database, table, osm_type, dump): geom_col = "way" conn = psycopg2.connect("dbname=" + database) cursor = conn.cursor(table + "_get_items") cursor.execute("""SELECT *, X(st_centroid(%s)) AS centroid_x, Y(st_centroid(%s)) AS centroid_y, ST_AsGeoJSON(%s) AS geojson FROM %s WHERE ST_GeometryType(ST_Centroid(way)) = 'ST_Point'""" % (geom_col, geom_col, geom_col, table)) for row in Result(cursor): preferred_name = row.get("name:en", row.get("name")) feature_code = admin_level_map.get(str(row["admin_level"])) if not feature_code: for tag in key_tags: if row.get(tag) and feature_code_map.get((tag, row[tag])): feature_code = feature_code_map[tag, row[tag]] break if not preferred_name or not feature_code: continue centroid = [row["centroid_x"], row["centroid_y"]] geometry = json.loads(row["geojson"]) uri = "http://osm.org/browse/%s/%s" % (osm_type, row["osm_id"]) if "way/-" in uri: uri = uri.replace("way/-", "relation/") + ("#%d" % (binascii.crc32(row["geojson"]) & 0xffffffff)) names = [] if row["name"]: names.append({"name": row["name"], "lang": "", "type": "preferred"}) if row["name:"]: try: alt_names = json.loads("{" + row["name:"].replace("=>", ":") + "}") for lang, name in alt_names.items(): names.append({ "name": name, "lang": lang, "type": "preferred" }) except (ValueError, UnicodeDecodeError): pass for alt_name in names: ascii_name = transliterate(alt_name) if ascii_name and ascii_name["name"] != alt_name["name"]: names.append(ascii_name) source = dict(row) for key in source.keys(): if not source[key] or key in ("geojson", "centroid_x", "centroid_y", "way"): del source[key] place = { "name": preferred_name, "centroid": centroid, "feature_code": feature_code, "geometry": geometry, "is_primary": True, "source": source, "alternate": names, "updated": row["timestamp"], "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place)
def extract_osm(database, table, osm_type, dump): geom_col = "way" conn = psycopg2.connect("dbname=" + database) cursor = conn.cursor(table + "_get_items") cursor.execute("""SELECT *, X(st_centroid(%s)) AS centroid_x, Y(st_centroid(%s)) AS centroid_y, ST_AsGeoJSON(%s) AS geojson FROM %s WHERE ST_GeometryType(ST_Centroid(way)) = 'ST_Point'""" % (geom_col, geom_col, geom_col, table)) for row in Result(cursor): preferred_name = row.get("name:en", row.get("name")) feature_code = admin_level_map.get(str(row["admin_level"])) if not feature_code: for tag in key_tags: if row.get(tag) and feature_code_map.get((tag, row[tag])): feature_code = feature_code_map[tag, row[tag]] break if not preferred_name or not feature_code: continue centroid = [row["centroid_x"], row["centroid_y"]] geometry = json.loads(row["geojson"]) uri = "http://osm.org/browse/%s/%s" % (osm_type, row["osm_id"]) if "way/-" in uri: uri = uri.replace("way/-", "relation/") + ( "#%d" % (binascii.crc32(row["geojson"]) & 0xffffffff)) names = [] if row["name"]: names.append({ "name": row["name"], "lang": "", "type": "preferred" }) if row["name:"]: try: alt_names = json.loads("{" + row["name:"].replace("=>", ":") + "}") for lang, name in alt_names.items(): names.append({ "name": name, "lang": lang, "type": "preferred" }) except (ValueError, UnicodeDecodeError): pass for alt_name in names: ascii_name = transliterate(alt_name) if ascii_name and ascii_name["name"] != alt_name["name"]: names.append(ascii_name) source = dict(row) for key in source.keys(): if not source[key] or key in ("geojson", "centroid_x", "centroid_y", "way"): del source[key] place = { "name": preferred_name, "centroid": centroid, "feature_code": feature_code, "geometry": geometry, "is_primary": True, "source": source, "alternate": names, "updated": row["timestamp"], "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place)