Пример #1
0
def regulate_table_name(table_name, subregion_name_as_table_name=True):
    """
    :param table_name: [str]
    :param subregion_name_as_table_name: [bool] (default: True)
    :return: [str]
    """
    if subregion_name_as_table_name:
        table_name = regulate_input_subregion_name(table_name)
    table_name_ = table_name[:60] + '..' if len(
        table_name) >= 63 else table_name
    table_name_ = table_name_.replace("'", "_")
    return table_name_
Пример #2
0
    def dump_osm_pbf_data(self,
                          subregion_data,
                          table_name,
                          parsed=True,
                          if_exists='replace',
                          chunk_size=None,
                          subregion_name_as_table_name=True,
                          verbose=True):
        """
        :param subregion_data: [pd.DataFrame] data of a subregion
        :param table_name: [str] name of a table; e.g. name of the subregion (recommended)
        :param parsed: [bool] (default: True) whether 'subregion_data' has been parsed
        :param if_exists: [str] 'fail', 'replace' (default), or 'append'
        :param chunk_size: [int; None (default)]
        :param subregion_name_as_table_name: [bool] (default: True) whether to use subregion name as table name
        :param verbose: [bool] (default: True)
        """
        if subregion_name_as_table_name:
            table_name = regulate_input_subregion_name(table_name)

        if verbose:
            print("Dumping \"{}\" to {}@{}:{} ... ".format(
                table_name, self.database_name, self.host, self.port))
        for geom_type, layer_data in subregion_data.items():
            print("         {} ... ".format(geom_type),
                  end="") if verbose else ""
            if layer_data.empty and self.subregion_table_exists(
                    geom_type, table_name, subregion_name_as_table_name):
                print(
                    "The layer is empty. An empty table already exists in the database."
                ) if verbose else ""
                pass
            else:
                try:
                    self.dump_osm_pbf_data_by_layer(
                        layer_data, geom_type, table_name,
                        subregion_name_as_table_name, parsed, if_exists,
                        chunk_size)
                    print("Done. Total amount of features: {}".format(
                        len(layer_data))) if verbose else ""
                except Exception as e:
                    print("Failed. CAUSE: \"{}\"".format(e))
            del layer_data
            gc.collect()
Пример #3
0
def merge_multi_shp(subregion_names,
                    layer,
                    update_shp_zip=False,
                    download_confirmation_required=True,
                    data_dir=None,
                    prefix="gis_osm",
                    rm_zip_extracts=False,
                    rm_shp_parts=False,
                    merged_shp_dir=None,
                    verbose=False):
    """
    :param subregion_names: [list] a list of subregion names, e.g. ['rutland', 'essex']
    :param layer: [str] name of a OSM layer, e.g. 'railways'
    :param update_shp_zip: [bool] (default: False) indicates whether to update the relevant file/information
    :param download_confirmation_required: [bool] (default: True)
    :param data_dir: [str; None]
    :param prefix: [str] (default: "gis_osm")
    :param rm_zip_extracts: [bool] (default: False)
    :param rm_shp_parts: [bool] (default: False)
    :param merged_shp_dir: [str; None (default)] if None, use the layer name as the name of the folder where the merged
                                                shp files will be saved
    :param verbose: [bool] (default: False)

    Layers include 'buildings', 'landuse', 'natural', 'places', 'points', 'railways', 'roads' and 'waterways'

    Note that this function does not create projection (.prj) for the merged map.
    Reference: http://geospatialpython.com/2011/02/create-prj-projection-file-for.html for creating a .prj file.

    Example:
        subregion_names                = ['Rutland', 'Herefordshire']
        layer                          = 'railways'
        update_shp_zip                 = False
        download_confirmation_required = True
        data_dir                       = cd("test_read_GeoFabrik")
        prefix                         = "gis_osm"
        rm_zip_extracts                = False
        rm_shp_parts                   = False
        merged_shp_dir                 = None
        verbose                        = True
        merge_multi_shp(subregion_names, layer, update_shp_zip, download_confirmation_required, output_dir)
    """
    # Make sure all the required shape files are ready
    subregion_names_, file_format = [
        regulate_input_subregion_name(x) for x in subregion_names
    ], ".shp.zip"
    download_subregion_osm_file(
        *subregion_names_,
        osm_file_format=file_format,
        download_dir=data_dir,
        update=update_shp_zip,
        download_confirmation_required=download_confirmation_required,
        verbose=verbose)

    # Extract all files from .zip
    if not data_dir:  # output_dir is None or output_dir == ""
        file_paths = (get_default_path_to_osm_file(x, file_format,
                                                   mkdir=False)[1]
                      for x in subregion_names_)
    else:
        default_filenames = (get_default_path_to_osm_file(x,
                                                          file_format,
                                                          mkdir=False)[0]
                             for x in subregion_names_)
        file_paths = [
            cd(regulate_input_data_dir(data_dir), f) for f in default_filenames
        ]

    extract_info = [(p, os.path.splitext(p)[0]) for p in file_paths]
    extract_dirs = []
    for file_path, extract_dir in extract_info:
        extract_shp_zip(file_path, extract_dir, layer=layer, verbose=verbose)
        extract_dirs.append(extract_dir)

    # Specify a directory that stores files for the specific layer
    if not data_dir:
        path_to_merged = cd(os.path.commonpath(extract_info[0]),
                            "merged_" + layer)
    else:
        path_to_merged = cd(regulate_input_data_dir(data_dir),
                            "merged_" + layer)

    if not os.path.exists(path_to_merged):
        os.mkdir(path_to_merged)

    # Copy .shp files (e.g. gis_osm_***_free_1.shp) into the output directory
    for subregion, p in zip(subregion_names, extract_dirs):
        for original_filename in glob.glob1(p, "*{}*".format(layer)):
            dest = os.path.join(
                path_to_merged,
                "{}_{}".format(subregion.lower().replace(' ', '-'),
                               original_filename))
            if rm_zip_extracts:
                shutil.move(os.path.join(p, original_filename), dest)
                shutil.rmtree(p)
            else:
                shutil.copyfile(os.path.join(p, original_filename), dest)

    # Resource: https://github.com/GeospatialPython/pyshp
    shp_file_paths = [
        x for x in glob.glob(os.path.join(path_to_merged, "*.shp"))
        if not os.path.basename(x).startswith("merged_")
    ]

    path_to_merged_shp_file = cd(path_to_merged,
                                 "merged_" + prefix + "_" + layer)
    w = shapefile.Writer(path_to_merged_shp_file)
    if verbose:
        print("\nMerging the following shape files:\n    {}".format(
            "\n    ".join(os.path.basename(f) for f in shp_file_paths)))
        print("In progress ... ", end="")
    try:
        for f in shp_file_paths:
            r = shapefile.Reader(f)
            w.fields = r.fields[1:]  # skip first deletion field
            w.shapeType = r.shapeType
            for shaperec in r.iterShapeRecords():
                w.record(*shaperec.record)
                w.shape(shaperec.shape)
            r.close()
        w.close()
        merged_shp_data = gpd.read_file(path_to_merged_shp_file + ".shp")
        merged_shp_data.crs = {
            'no_defs': True,
            'ellps': 'WGS84',
            'datum': 'WGS84',
            'proj': 'longlat'
        }
        merged_shp_data.to_file(filename=path_to_merged_shp_file,
                                driver="ESRI Shapefile")
        print("Successfully.") if verbose else ""
    except Exception as e:
        print("Failed. {}".format(e)) if verbose else ""
    print("The output .shp file is saved in \"{}\".".format(
        path_to_merged)) if verbose else ""

    if rm_shp_parts:
        if merged_shp_dir:
            new_shp_dir = cd(regulate_input_data_dir(merged_shp_dir),
                             mkdir=True)
        else:
            new_shp_dir = cd(data_dir, layer, mkdir=True)
        for x in glob.glob(cd(path_to_merged, "merged_*")):
            shutil.move(
                x,
                cd(new_shp_dir,
                   os.path.basename(x).replace("merged_", "", 1)))
        shutil.rmtree(path_to_merged)
Пример #4
0
def merge_multi_shp(subregion_names,
                    layer,
                    update_shp_zip=False,
                    download_confirmation_required=True,
                    output_dir=None):
    """
    :param subregion_names: [list] a list of subregion names, e.g. ['london', 'essex']
    :param layer: [str] name of a OSM layer, e.g. 'railways'
    :param update_shp_zip: [bool] (default: False) indicates whether to update the relevant file/information
    :param download_confirmation_required: [bool] (default: True)
    :param output_dir: [str; None]

    Layers include 'buildings', 'landuse', 'natural', 'places', 'points', 'railways', 'roads' and 'waterways'

    Note that this function does not create projection (.prj) for the merged map.
    Reference: http://geospatialpython.com/2011/02/create-prj-projection-file-for.html for creating a .prj file.

    Testing e.g.
        subregion_names                = ['london', 'essex']
        layer                          = 'railways'
        update_shp_zip                 = False
        download_confirmation_required = True
        output_dir                     = None
    """
    # Make sure all the required shape files are ready
    subregion_names_, file_format = [
        regulate_input_subregion_name(x) for x in subregion_names
    ], ".shp.zip"
    download_subregion_osm_file(
        *subregion_names_,
        osm_file_format=file_format,
        download_dir=output_dir,
        update=update_shp_zip,
        download_confirmation_required=download_confirmation_required)

    # Extract all files from .zip
    file_paths = (get_default_path_to_osm_file(x, file_format, mkdir=False)[1]
                  for x in subregion_names_)
    extract_info = [(p, os.path.splitext(p)[0]) for p in file_paths]
    extract_dirs = []
    for file_path, extract_dir in extract_info:
        extract_shp_zip(file_path, extract_dir)
        extract_dirs.append(extract_dir)

    # Specify a directory that stores files for the specific layer
    if output_dir:
        assert os.path.isabs(output_dir)
        path_to_merged = output_dir
    else:
        path_to_merged = os.path.join(os.path.commonpath(extract_info[0]),
                                      "merged_" + layer)

    if not os.path.exists(path_to_merged):
        os.mkdir(path_to_merged)

    # Copy .shp files (e.g. gis_osm_***_free_1.shp) into the output directory
    for subregion, p in zip(subregion_names, extract_dirs):
        for original_filename in glob.glob1(p, "*{}*".format(layer)):
            dest = os.path.join(
                path_to_merged,
                "{}_{}".format(subregion.lower().replace(' ', '-'),
                               original_filename))
            shutil.copyfile(os.path.join(p, original_filename), dest)

    # Resource: https://github.com/GeospatialPython/pyshp
    shp_file_paths = glob.glob(os.path.join(path_to_merged, '*.shp'))

    w = shapefile.Writer(os.path.join(path_to_merged, "merged_" + layer))
    print("\nMerging the following shape files:\n    {}".format("\n    ".join(
        os.path.basename(f) for f in shp_file_paths)))
    print("In progress ... ", end="")
    try:
        for f in shp_file_paths:
            r = shapefile.Reader(f)
            w.fields = r.fields[1:]  # skip first deletion field
            w.shapeType = r.shapeType
            for shaperec in r.iterShapeRecords():
                w.record(*shaperec.record)
                w.shape(shaperec.shape)
            r.close()
        w.close()
        print("Successfully.")
    except Exception as e:
        print("Failed. {}".format(e))
    print("\nCheck out \"{}\".\n".format(path_to_merged))