示例#1
0
def validate_bbbike_download_info(subregion_name,
                                  osm_file_format,
                                  download_dir=None):
    """
    :param subregion_name: [str]
    :param osm_file_format: [str]
    :param download_dir: [str; None (default)]
    :return: [tuple] of length 4 ([str], [str], [str], [str]) subregion name, filename, download url and file path

    Example:
        subregion_name  = 'leeds'
        osm_file_format = 'pbf'
        download_dir    = None
        validate_bbbike_download_info(subregion_name, osm_file_format, download_dir)
    """
    subregion_name_, download_url = get_bbbike_subregion_download_url(
        subregion_name, osm_file_format)
    osm_filename = os.path.basename(download_url)
    if not download_dir:
        # Download the requested OSM file to default directory
        path_to_file = cd_dat_bbbike(subregion_name_, osm_filename)
    else:
        path_to_file = os.path.join(regulate_input_data_dir(download_dir),
                                    osm_filename)
    return subregion_name_, osm_filename, download_url, path_to_file
示例#2
0
def download_bbbike_subregion_osm_all_files(subregion_name, download_dir=None, download_confirmation_required=True):
    """
    :param subregion_name: [str]
    :param download_dir: [str or None]
    :param download_confirmation_required: [bool]
    """
    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    bbbike_download_dictionary = fetch_bbbike_download_catalogue("BBBike-download-catalogue")
    sub_download_catalogue = bbbike_download_dictionary[subregion_name_]

    data_dir = cd_dat_bbbike(subregion_name_) if not download_dir else regulate_input_data_dir(download_dir)

    if confirmed("Confirm to download all available BBBike data for \"{}\"?".format(subregion_name_),
                 confirmation_required=download_confirmation_required):
        print("\nStart to download all available OSM data for \"{}\" ... \n".format(subregion_name_))
        for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename):
            print("\n\n\"{}\" (below): ".format(osm_filename))
            try:
                path_to_file = os.path.join(data_dir, subregion_name_, osm_filename)
                download(download_url, path_to_file)
                # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5:
                #     time.sleep(5)
            except Exception as e:
                print("\nFailed to download \"{}\". {}.".format(osm_filename, e))
        print("\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format(
            subregion_name_, os.path.join(data_dir, subregion_name_)))
    else:
        print("The downloading process was not activated.")
示例#3
0
def download_subregion_osm_file(*subregion_name,
                                osm_file_format,
                                download_dir=None,
                                update=False,
                                download_confirmation_required=True,
                                verbose=True):
    """
    :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london'
    :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2"
    :param download_dir: [str] directory to save the downloaded file(s), or None (using default directory)
    :param update: [bool] whether to update (i.e. re-download) data
    :param download_confirmation_required: [bool] whether to confirm before downloading
    :param verbose: [bool]
    """
    for sub_reg_name in subregion_name:

        # Get download URL
        subregion_name_, download_url = get_subregion_download_url(
            sub_reg_name, osm_file_format, update=False)

        if not download_dir:
            # Download the requested OSM file to default directory
            osm_filename, path_to_file = get_default_path_to_osm_file(
                subregion_name_, osm_file_format, mkdir=True)
        else:
            regulated_dir = regulate_input_data_dir(download_dir)
            osm_filename = get_default_osm_filename(
                subregion_name_, osm_file_format=osm_file_format)
            path_to_file = os.path.join(regulated_dir, osm_filename)

        if os.path.isfile(path_to_file) and not update:
            if verbose:
                print(
                    "\n\"{}\" is already available for \"{}\" at: \n\"{}\".\n".
                    format(osm_filename, subregion_name_, path_to_file))
            else:
                pass
        else:
            if confirmed("\nTo download {} data for {}".format(
                    osm_file_format, subregion_name_),
                         confirmation_required=download_confirmation_required):

                op = "Updating" if os.path.isfile(
                    path_to_file) else "Downloading"
                try:
                    download(download_url, path_to_file)
                    print("\n{} \"{}\" for \"{}\" ... Done.".format(
                        op, osm_filename, subregion_name_))
                    print("Check out: \"{}\".".format(path_to_file))
                except Exception as e:
                    print("\nFailed to download \"{}\". {}.".format(
                        osm_filename, e))
            else:
                print("The downloading process was not activated.")
示例#4
0
def find_osm_pbf_file(subregion_name, data_dir=None):
    """
    :param subregion_name: [str]
    :param data_dir: [str or None]
    :return: [str] path to .osm.pbf file
    """
    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if not data_dir:  # Go to default file path
        path_to_osm_pbf_ = path_to_osm_pbf
    else:
        osm_pbf_dir = regulate_input_data_dir(data_dir)
        path_to_osm_pbf_ = os.path.join(osm_pbf_dir, osm_pbf_filename)
    return path_to_osm_pbf_ if os.path.isfile(path_to_osm_pbf_) else None
示例#5
0
def find_osm_shp_file(subregion_name,
                      layer=None,
                      feature=None,
                      data_dir=None,
                      file_ext=".shp"):
    """
    :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london'
    :param layer: [str; None (default)] name of a .shp layer, e.g. 'railways'
    :param feature: [str or None(default)] feature name, e.g. 'rail'; if None, all available features included
    :param data_dir: [str; None (default)] directory in which the function go to; if None, use default directory
    :param file_ext: [str] (default: ".shp") file extension, e.g. ".shp" (default)
    :return: [list] a list of paths
                fetch_osm_file('england', 'railways', feature=None, file_format=".shp", update=False) should return
                ['...\\Europe\\Great Britain\\england-latest-free.shp\\gis.osm_railways_free_1.shp'],
                if such a file exists, and [] otherwise.

    Testing e.g.
        subregion_name = 'london'
        layer          = None
        feature        = None
        data_dir       = None
        file_ext       = ".shp"
        find_osm_shp_file(subregion_name, layer, feature, data_dir, file_ext)
    """
    if not data_dir:  # Go to default file path
        _, path_to_shp_zip = get_default_path_to_osm_file(
            subregion_name, osm_file_format=".shp.zip", mkdir=False)
        shp_dir = os.path.splitext(path_to_shp_zip)[0]
    else:
        shp_dir = regulate_input_data_dir(data_dir)

    if not layer:
        osm_file_paths = glob.glob(shp_dir + "\\*" + file_ext)
    else:
        pat = re.compile(
            "{}(_a)?_free".format(layer)) if not feature else re.compile(
                "{}_{}".format(layer, feature))
        osm_file_paths = [
            f for f in glob.glob(shp_dir + "\\*" + file_ext)
            if re.search(pat, f)
        ]

    # if not osm_file_paths: print("The required file may not exist.")
    return osm_file_paths
示例#6
0
def read_osm_pbf(subregion_name,
                 data_dir=None,
                 parsed=True,
                 file_size_limit=50,
                 fmt_other_tags=True,
                 fmt_single_geom=True,
                 fmt_multi_geom=True,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_osm_pbf=False,
                 verbose=False):
    """
    :param subregion_name: [str] e.g. 'rutland'
    :param data_dir: [str; None (default)] customised path of a .osm.pbf file
    :param parsed: [bool] (default: True)
    :param file_size_limit: [numbers.Number] (default: 50) limit of file size (in MB),  e.g. 50, or 100
    :param fmt_other_tags: [bool] (default: True)
    :param fmt_single_geom: [bool] (default: True)
    :param fmt_multi_geom: [bool] (default: True)
    :param update: [bool] (default: False)
    :param download_confirmation_required: [bool] (default: True)
    :param pickle_it: [bool] (default: False)
    :param rm_osm_pbf: [bool] (default: False)
    :param verbose: [bool] (default: False)
    :return: [dict; None]

    If 'subregion' is the name of the subregion, the default file path will be used.

    Example:
        subregion_name                 = 'Rutland'
        data_dir                       = None
        parsed                         = True
        file_size_limit                = 50
        fmt_other_tags                 = True
        fmt_single_geom                = True
        fmt_multi_geom                 = True
        update                         = False
        download_confirmation_required = True
        pickle_it                      = False
        rm_osm_pbf                     = True
        verbose                        = False
        read_osm_pbf(subregion_name, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom,
                     update, download_confirmation_required, pickle_it, rm_osm_pbf, verbose)
    """
    assert isinstance(file_size_limit, int) or file_size_limit is None

    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if osm_pbf_filename and path_to_osm_pbf:
        if not data_dir:  # Go to default file path
            path_to_osm_pbf = path_to_osm_pbf
        else:
            osm_pbf_dir = regulate_input_data_dir(data_dir)
            path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)

        subregion_filename = os.path.basename(path_to_osm_pbf)

        path_to_pickle = path_to_osm_pbf.replace(
            ".osm.pbf", ".pickle" if parsed else "-raw.pickle")
        if os.path.isfile(path_to_pickle) and not update:
            osm_pbf_data = load_pickle(path_to_pickle, verbose=verbose)
        else:
            # If the target file is not available, try downloading it first.
            if not os.path.isfile(path_to_osm_pbf) or update:
                try:
                    download_subregion_osm_file(subregion_name,
                                                osm_file_format=".osm.pbf",
                                                download_dir=data_dir,
                                                download_confirmation_required=
                                                download_confirmation_required,
                                                update=update,
                                                verbose=False)
                except Exception as e:
                    print("Cancelled reading data. CAUSE: {}".format(e))
                osm_pbf_data = None

            else:
                file_size_in_mb = round(
                    os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

                if file_size_limit and file_size_in_mb > file_size_limit:
                    # Parsing the '.osm.pbf' file in a chunk-wise way
                    chunks_no = math.ceil(file_size_in_mb / file_size_limit)
                else:
                    chunks_no = None

                print("\nParsing \"{}\" ... ".format(subregion_filename),
                      end="") if verbose else ""
                try:
                    osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no,
                                                 parsed, fmt_other_tags,
                                                 fmt_single_geom,
                                                 fmt_multi_geom)
                    print("Successfully.\n") if verbose else ""
                    if pickle_it:
                        save_pickle(osm_pbf_data,
                                    path_to_pickle,
                                    verbose=verbose)
                except Exception as e:
                    print("Failed. CAUSE: \"{}\"\n".format(e))
                    osm_pbf_data = None

                if rm_osm_pbf:
                    remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)

        return osm_pbf_data

    else:
        print(
            "Errors occur. Maybe check with the input \"subregion_name\" first."
        )
示例#7
0
def read_shp_zip(subregion_name,
                 layer,
                 feature=None,
                 data_dir=None,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_extracts=False,
                 rm_shp_zip=False,
                 verbose=False):
    """
    :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive
    :param layer: [str] e.g. 'railways'
    :param feature: [str; None (default)] e.g. 'rail'; if None, all available features included
    :param data_dir: [str; None (default)]
    :param update: [bool] (default: False) whether to update the relevant file/information
    :param download_confirmation_required: [bool] (default: False)
    :param pickle_it: [bool] (default: False)
    :param rm_extracts: [bool] (default: False) whether to delete extracted files from the .shp.zip file
    :param rm_shp_zip: [bool] (default: False) whether to delete the downloaded .shp.zip file
    :param verbose: [bool] (default: False)
    :return: [gpd.GeoDataFrame]

    Example:
        subregion_name                 = 'Rutland'
        layer                          = 'railways'
        feature                        = None
        data_dir                       = cd("test_read_GeoFabrik")
        update                         = False
        download_confirmation_required = True
        pickle_it                      = False
        rm_extracts                    = True
        rm_shp_zip                     = False
        verbose                        = True
        read_shp_zip(subregion_name, layer, feature, data_dir, update, download_confirmation_required, pickle_it,
                     rm_extracts, rm_shp_zip, verbose)
    """
    shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file(
        subregion_name, ".shp.zip", mkdir=False)
    if shp_zip_filename and path_to_shp_zip:
        extract_dir = os.path.splitext(path_to_shp_zip)[0]
        if data_dir:
            shp_zip_dir = regulate_input_data_dir(data_dir)
            path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename)
            extract_dir = os.path.join(shp_zip_dir,
                                       os.path.basename(extract_dir))

        # Make a local path for saving a pickle file for .shp data
        sub_name = "-".join(x
                            for x in [
                                shp_zip_filename.replace(
                                    "-latest-free.shp.zip", ""), layer, feature
                            ] if x)
        path_to_shp_pickle = os.path.join(extract_dir,
                                          sub_name + ".shp.pickle")

        if os.path.isfile(path_to_shp_pickle) and not update:
            shp_data = load_pickle(path_to_shp_pickle, verbose=verbose)
        else:
            # Download the requested OSM file urlretrieve(download_url, file_path)
            if not os.path.exists(extract_dir):
                download_subregion_osm_file(shp_zip_filename,
                                            osm_file_format=".shp.zip",
                                            download_dir=data_dir,
                                            update=update,
                                            verbose=verbose,
                                            download_confirmation_required=
                                            download_confirmation_required)

            if os.path.isfile(path_to_shp_zip):
                extract_shp_zip(path_to_shp_zip,
                                extract_dir,
                                layer=layer,
                                verbose=verbose)

            path_to_shp = glob.glob(
                os.path.join(extract_dir, "*{}*.shp".format(layer)))
            if len(path_to_shp) == 0:
                shp_data = None
            elif len(path_to_shp) == 1:
                shp_data = gpd.read_file(
                    path_to_shp[0]
                )  # gpd.GeoDataFrame(read_shp_file(path_to_shp))
                if feature:
                    path_to_shp_feat = path_to_shp[0].replace(
                        layer, layer + "_" + feature)
                    shp_data = gpd.GeoDataFrame(
                        shp_data[shp_data.fclass == feature])
                    shp_data.crs = {
                        'no_defs': True,
                        'ellps': 'WGS84',
                        'datum': 'WGS84',
                        'proj': 'longlat'
                    }
                    shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile')
            else:  # len(path_to_shp) > 1:
                if not feature:
                    path_to_orig_shp = [
                        p for p in path_to_shp
                        if layer + '_a' in p or layer + '_free' in p
                    ]
                    if len(path_to_orig_shp
                           ) == 1:  # "_a*.shp" is not available
                        shp_data = gpd.read_file(path_to_orig_shp[0])
                    else:
                        shp_data = [gpd.read_file(p) for p in path_to_shp]
                        shp_data = pd.concat(shp_data,
                                             axis=0,
                                             ignore_index=True)
                else:  # feature is None
                    path_to_shp_feat = [
                        p for p in path_to_shp
                        if layer + "_" + feature not in p
                    ]
                    if len(path_to_shp_feat) == 1:  # "_a*.shp" does not exist
                        shp_data = gpd.read_file(path_to_shp_feat[0])
                        shp_data = shp_data[shp_data.fclass == feature]
                    else:  # both "_a*" and "_free*" .shp for feature is available
                        shp_data = [
                            dat[dat.fclass == feature]
                            for dat in (gpd.read_file(p)
                                        for p in path_to_shp_feat)
                        ]
                        shp_data = pd.concat(shp_data,
                                             axis=0,
                                             ignore_index=True)
                    shp_data.crs = {
                        'no_defs': True,
                        'ellps': 'WGS84',
                        'datum': 'WGS84',
                        'proj': 'longlat'
                    }
                    shp_data.to_file(path_to_shp_feat[0].replace(
                        layer, layer + "_" + feature),
                                     driver='ESRI Shapefile')

            if pickle_it:
                save_pickle(shp_data, path_to_shp_pickle, verbose=verbose)

            if os.path.exists(extract_dir) and rm_extracts:
                # import shutil; shutil.rmtree(extract_dir)
                for f in glob.glob(os.path.join(extract_dir, "gis_osm*")):
                    # if layer not in f:
                    os.remove(f)

            if os.path.isfile(path_to_shp_zip) and rm_shp_zip:
                remove_subregion_osm_file(path_to_shp_zip, verbose=verbose)

        return shp_data
示例#8
0
def merge_multi_shp(subregion_names,
                    layer,
                    update_shp_zip=False,
                    download_confirmation_required=True,
                    data_dir=None,
                    prefix="gis_osm",
                    rm_zip_extracts=False,
                    rm_shp_parts=False,
                    merged_shp_dir=None,
                    verbose=False):
    """
    :param subregion_names: [list] a list of subregion names, e.g. ['rutland', 'essex']
    :param layer: [str] name of a OSM layer, e.g. 'railways'
    :param update_shp_zip: [bool] (default: False) indicates whether to update the relevant file/information
    :param download_confirmation_required: [bool] (default: True)
    :param data_dir: [str; None]
    :param prefix: [str] (default: "gis_osm")
    :param rm_zip_extracts: [bool] (default: False)
    :param rm_shp_parts: [bool] (default: False)
    :param merged_shp_dir: [str; None (default)] if None, use the layer name as the name of the folder where the merged
                                                shp files will be saved
    :param verbose: [bool] (default: False)

    Layers include 'buildings', 'landuse', 'natural', 'places', 'points', 'railways', 'roads' and 'waterways'

    Note that this function does not create projection (.prj) for the merged map.
    Reference: http://geospatialpython.com/2011/02/create-prj-projection-file-for.html for creating a .prj file.

    Example:
        subregion_names                = ['Rutland', 'Herefordshire']
        layer                          = 'railways'
        update_shp_zip                 = False
        download_confirmation_required = True
        data_dir                       = cd("test_read_GeoFabrik")
        prefix                         = "gis_osm"
        rm_zip_extracts                = False
        rm_shp_parts                   = False
        merged_shp_dir                 = None
        verbose                        = True
        merge_multi_shp(subregion_names, layer, update_shp_zip, download_confirmation_required, output_dir)
    """
    # Make sure all the required shape files are ready
    subregion_names_, file_format = [
        regulate_input_subregion_name(x) for x in subregion_names
    ], ".shp.zip"
    download_subregion_osm_file(
        *subregion_names_,
        osm_file_format=file_format,
        download_dir=data_dir,
        update=update_shp_zip,
        download_confirmation_required=download_confirmation_required,
        verbose=verbose)

    # Extract all files from .zip
    if not data_dir:  # output_dir is None or output_dir == ""
        file_paths = (get_default_path_to_osm_file(x, file_format,
                                                   mkdir=False)[1]
                      for x in subregion_names_)
    else:
        default_filenames = (get_default_path_to_osm_file(x,
                                                          file_format,
                                                          mkdir=False)[0]
                             for x in subregion_names_)
        file_paths = [
            cd(regulate_input_data_dir(data_dir), f) for f in default_filenames
        ]

    extract_info = [(p, os.path.splitext(p)[0]) for p in file_paths]
    extract_dirs = []
    for file_path, extract_dir in extract_info:
        extract_shp_zip(file_path, extract_dir, layer=layer, verbose=verbose)
        extract_dirs.append(extract_dir)

    # Specify a directory that stores files for the specific layer
    if not data_dir:
        path_to_merged = cd(os.path.commonpath(extract_info[0]),
                            "merged_" + layer)
    else:
        path_to_merged = cd(regulate_input_data_dir(data_dir),
                            "merged_" + layer)

    if not os.path.exists(path_to_merged):
        os.mkdir(path_to_merged)

    # Copy .shp files (e.g. gis_osm_***_free_1.shp) into the output directory
    for subregion, p in zip(subregion_names, extract_dirs):
        for original_filename in glob.glob1(p, "*{}*".format(layer)):
            dest = os.path.join(
                path_to_merged,
                "{}_{}".format(subregion.lower().replace(' ', '-'),
                               original_filename))
            if rm_zip_extracts:
                shutil.move(os.path.join(p, original_filename), dest)
                shutil.rmtree(p)
            else:
                shutil.copyfile(os.path.join(p, original_filename), dest)

    # Resource: https://github.com/GeospatialPython/pyshp
    shp_file_paths = [
        x for x in glob.glob(os.path.join(path_to_merged, "*.shp"))
        if not os.path.basename(x).startswith("merged_")
    ]

    path_to_merged_shp_file = cd(path_to_merged,
                                 "merged_" + prefix + "_" + layer)
    w = shapefile.Writer(path_to_merged_shp_file)
    if verbose:
        print("\nMerging the following shape files:\n    {}".format(
            "\n    ".join(os.path.basename(f) for f in shp_file_paths)))
        print("In progress ... ", end="")
    try:
        for f in shp_file_paths:
            r = shapefile.Reader(f)
            w.fields = r.fields[1:]  # skip first deletion field
            w.shapeType = r.shapeType
            for shaperec in r.iterShapeRecords():
                w.record(*shaperec.record)
                w.shape(shaperec.shape)
            r.close()
        w.close()
        merged_shp_data = gpd.read_file(path_to_merged_shp_file + ".shp")
        merged_shp_data.crs = {
            'no_defs': True,
            'ellps': 'WGS84',
            'datum': 'WGS84',
            'proj': 'longlat'
        }
        merged_shp_data.to_file(filename=path_to_merged_shp_file,
                                driver="ESRI Shapefile")
        print("Successfully.") if verbose else ""
    except Exception as e:
        print("Failed. {}".format(e)) if verbose else ""
    print("The output .shp file is saved in \"{}\".".format(
        path_to_merged)) if verbose else ""

    if rm_shp_parts:
        if merged_shp_dir:
            new_shp_dir = cd(regulate_input_data_dir(merged_shp_dir),
                             mkdir=True)
        else:
            new_shp_dir = cd(data_dir, layer, mkdir=True)
        for x in glob.glob(cd(path_to_merged, "merged_*")):
            shutil.move(
                x,
                cd(new_shp_dir,
                   os.path.basename(x).replace("merged_", "", 1)))
        shutil.rmtree(path_to_merged)
示例#9
0
def psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None,
                               update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True,
                               fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, rm_raw_file=False,
                               verbose=False):
    """
    Import data of selected or all (sub)regions, which do not have (sub-)subregions, into PostgreSQL server

    :param subregion_name: [str]
    :param database_name: [str] (default: 'OSM_Geofabrik')
    :param data_dir: [str; None (default)]
    :param update_osm_pbf: [bool] (default: False)
    :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail'
    :param file_size_limit: [int] (default: 100)
    :param parsed: [bool] (default: True)
    :param fmt_other_tags: [bool] (default: True)
    :param fmt_single_geom: [bool] (default: True)
    :param fmt_multi_geom: [bool] (default: True)
    :param rm_raw_file: [bool] (default: False)
    :param verbose: [bool] (default: False)
    """
    if not subregion_name:
        subregion_names = fetch_region_subregion_tier("GeoFabrik-non-subregion-list")
        confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? "
    else:
        subregion_names = retrieve_names_of_subregions_of(*subregion_name)
        confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format(
            ", ".join(subregion_names))

    if confirmed(confirm_msg):

        # Connect to PostgreSQL server
        osmdb = OSM()
        osmdb.connect_db(database_name=database_name)

        err_subregion_names = []
        for subregion_name_ in subregion_names:
            default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file(subregion_name_, ".osm.pbf")
            if not data_dir:  # Go to default file path
                path_to_osm_pbf = default_path_to_pbf
            else:
                osm_pbf_dir = regulate_input_data_dir(data_dir)
                path_to_osm_pbf = os.path.join(osm_pbf_dir, default_pbf_filename)

            download_subregion_osm_file(subregion_name_, osm_file_format=".osm.pbf", download_dir=data_dir,
                                        update=update_osm_pbf, download_confirmation_required=False, verbose=verbose)

            file_size_in_mb = round(os.path.getsize(path_to_osm_pbf) / (1024 ** 2), 1)

            try:
                if file_size_in_mb <= file_size_limit:

                    subregion_osm_pbf = read_osm_pbf(subregion_name_, data_dir, parsed, file_size_limit,
                                                     fmt_other_tags, fmt_single_geom, fmt_multi_geom,
                                                     update=False, download_confirmation_required=False,
                                                     pickle_it=False, rm_osm_pbf=rm_raw_file)

                    if subregion_osm_pbf is not None:
                        osmdb.dump_osm_pbf_data(subregion_osm_pbf, table_name=subregion_name_,
                                                if_exists=if_table_exists)
                        del subregion_osm_pbf
                        gc.collect()

                else:
                    print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_))
                    # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html
                    raw_osm_pbf = ogr.Open(path_to_osm_pbf)
                    layer_count = raw_osm_pbf.GetLayerCount()
                    for i in range(layer_count):
                        lyr = raw_osm_pbf.GetLayerByIndex(i)  # Hold the i-th layer
                        lyr_name = lyr.GetName()
                        print("                       {} ... ".format(lyr_name), end="")
                        try:
                            lyr_feats = [feat for _, feat in enumerate(lyr)]
                            feats_no, chunks_no = len(lyr_feats), math.ceil(file_size_in_mb / file_size_limit)
                            chunked_lyr_feats = split_list(lyr_feats, chunks_no)

                            del lyr_feats
                            gc.collect()

                            if osmdb.subregion_table_exists(lyr_name, subregion_name_) and if_table_exists == 'replace':
                                osmdb.drop_subregion_data_by_layer(subregion_name_, lyr_name)

                            # Loop through all available features
                            for lyr_chunk in chunked_lyr_feats:
                                lyr_chunk_dat = pd.DataFrame(rapidjson.loads(f.ExportToJson()) for f in lyr_chunk)
                                lyr_chunk_dat = parse_layer_data(lyr_chunk_dat, lyr_name,
                                                                 fmt_other_tags, fmt_single_geom, fmt_multi_geom)
                                if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append'
                                osmdb.dump_osm_pbf_data_by_layer(lyr_chunk_dat, if_exists=if_exists_,
                                                                 schema_name=lyr_name, table_name=subregion_name_)
                                del lyr_chunk_dat
                                gc.collect()

                            print("Done. Total amount of features: {}".format(feats_no))

                        except Exception as e:
                            print("Failed. {}".format(e))

                    raw_osm_pbf.Release()
                    del raw_osm_pbf
                    gc.collect()

                if rm_raw_file:
                    remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)

            except Exception as e:
                print(e)
                err_subregion_names.append(subregion_name_)

            if subregion_name_ != subregion_names[-1]:
                time.sleep(60)

        if len(err_subregion_names) == 0:
            print("\nMission accomplished.\n")
        else:
            print("\nErrors occurred when parsing data of the following subregion(s):")
            print(*err_subregion_names, sep=", ")

        osmdb.disconnect()
        del osmdb
示例#10
0
def read_osm_pbf(subregion_name,
                 data_dir=None,
                 parsed=True,
                 file_size_limit=50,
                 fmt_other_tags=True,
                 fmt_single_geom=True,
                 fmt_multi_geom=True,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_osm_pbf=True):
    """
    :param subregion_name: [str] e.g. 'london'
    :param data_dir: [str or None] customised path of a .osm.pbf file
    :param parsed: [bool]
    :param file_size_limit: [numbers.Number] limit of file size (in MB),  e.g. 50, or 100(default)
    :param fmt_other_tags: [bool]
    :param fmt_single_geom: [bool]
    :param fmt_multi_geom: [bool]
    :param update: [bool]
    :param download_confirmation_required: [bool]
    :param pickle_it: [bool]
    :param rm_osm_pbf: [bool]
    :return: [dict] or None

    If 'subregion' is the name of the subregion, the default file path will be used.
    """
    assert isinstance(file_size_limit, int) or file_size_limit is None

    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if not data_dir:  # Go to default file path
        path_to_osm_pbf = path_to_osm_pbf
    else:
        osm_pbf_dir = regulate_input_data_dir(data_dir)
        path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)

    subregion_filename = os.path.basename(path_to_osm_pbf)

    path_to_pickle = path_to_osm_pbf.replace(
        ".osm.pbf", ".pickle" if parsed else "-raw.pickle")
    if os.path.isfile(path_to_pickle) and not update:
        osm_pbf_data = load_pickle(path_to_pickle)
    else:
        # If the target file is not available, try downloading it first.
        download_subregion_osm_file(
            subregion_name,
            osm_file_format=".osm.pbf",
            download_dir=data_dir,
            update=update,
            download_confirmation_required=download_confirmation_required,
            verbose=False)

        file_size_in_mb = round(
            os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

        if file_size_limit and file_size_in_mb > file_size_limit:
            chunks_no = math.ceil(
                file_size_in_mb / file_size_limit
            )  # Parsing the '.osm.pbf' file in a chunk-wise way
        else:
            chunks_no = None

        print("\nParsing \"{}\" ... ".format(subregion_filename), end="")
        try:
            osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed,
                                         fmt_other_tags, fmt_single_geom,
                                         fmt_multi_geom)
            print("Successfully.\n")
        except Exception as e:
            print("Failed. {}\n".format(e))
            osm_pbf_data = None

        if pickle_it:
            save_pickle(osm_pbf_data, path_to_pickle)
        if rm_osm_pbf:
            remove_subregion_osm_file(path_to_osm_pbf)

    return osm_pbf_data
示例#11
0
def download_subregion_osm_file(*subregion_name,
                                osm_file_format,
                                download_dir=None,
                                update=False,
                                download_confirmation_required=True,
                                deep_retry=False,
                                verbose=False):
    """
    :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london'
    :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2"
    :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory)
    :param update: [bool] (default: False) whether to update (i.e. re-download) data
    :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading
    :param deep_retry: [bool] (default: False)
    :param verbose: [bool] (default: True)

    Example:
        subregion_name                 = 'london'
        osm_file_format                = ".osm.pbf"
        download_dir                   = None
        update                         = False
        download_confirmation_required = True
        verbose                        = True
        download_subregion_osm_file(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir,
                                    update=update, download_confirmation_required=download_confirmation_required,
                                    verbose=verbose)
    """
    for sub_reg_name in subregion_name:

        # Get download URL
        subregion_name_, download_url = get_subregion_download_url(
            sub_reg_name, osm_file_format, update=False)

        if pd.isna(download_url):
            if verbose:
                print(
                    "\"{}\" data is not available for \"{}\" from the server. "
                    "Try to download the data of its subregions instead. ".
                    format(osm_file_format, subregion_name_))
            sub_subregions = retrieve_names_of_subregions_of(subregion_name_,
                                                             deep=deep_retry)
            download_dir_ = cd(
                download_dir,
                subregion_name_.replace(" ", "-").lower() +
                os.path.splitext(osm_file_format)[0])
            download_subregion_osm_file(
                *sub_subregions,
                osm_file_format=osm_file_format,
                download_dir=download_dir_,
                update=update,
                download_confirmation_required=download_confirmation_required,
                verbose=verbose)
        else:
            if not download_dir:
                # Download the requested OSM file to default directory
                osm_filename, path_to_file = get_default_path_to_osm_file(
                    subregion_name_, osm_file_format, mkdir=True)
            else:
                regulated_dir = regulate_input_data_dir(download_dir)
                osm_filename = get_default_osm_filename(
                    subregion_name_, osm_file_format=osm_file_format)
                path_to_file = os.path.join(regulated_dir, osm_filename)

            if os.path.isfile(path_to_file) and not update:
                print(
                    "\n\"{}\" for \"{}\" is already available: \"{}\".".format(
                        osm_filename, subregion_name_,
                        path_to_file)) if verbose else ""
            else:
                op = "Updating" if os.path.isfile(
                    path_to_file) else "Downloading"
                if confirmed(
                        "To download the {} data of \"{}\", saved as \"{}\"\n".
                        format(osm_file_format, subregion_name_, path_to_file),
                        confirmation_required=download_confirmation_required):
                    try:
                        from pyhelpers.download import download
                        download(download_url, path_to_file)
                        if verbose:
                            print("{} \"{}\" for \"{}\" ... Done.".format(
                                op, osm_filename, subregion_name_))
                    except Exception as e:
                        print("Failed to download \"{}\". {}.\n".format(
                            osm_filename, e)) if verbose else ""
                else:
                    print("The {} of \"{}\" was cancelled.\n".format(
                        op.lower(), osm_filename)) if verbose else ""