示例#1
0
 def drop_layer_data_by_subregion(self,
                                  schema_name,
                                  *table_names,
                                  subregion_name_as_table_name=True,
                                  confirmation_required=True,
                                  verbose=False):
     """
     :param schema_name: [str] name of a layer name
     :param subregion_name_as_table_name: [bool] (default: True) whether to use subregion name as 'table_name'
     :param table_names: [str] one or multiple names of subregions
     :param confirmation_required: [bool] (default: True)
     :param verbose: [bool] (default: False)
     """
     table_names_ = (regulate_table_name(table_name,
                                         subregion_name_as_table_name)
                     for table_name in table_names)
     _, tbls_msg = self.multi_names_msg(*table_names, desc='table')
     if confirmed(
             "Confirmed to drop the {} from the database \"{}\"".format(
                 tbls_msg, self.database_name),
             confirmation_required=confirmation_required):
         tables = tuple(('{}.\"{}\"'.format(schema_name, table_name)
                         for table_name in table_names_))
         if verbose:
             print(("Dropping " + "%s, " *
                    (len(tables) - 2) + "%s and %s" + " ... ") % tables,
                   end="")
         try:
             self.engine.execute(
                 ('DROP TABLE IF EXISTS ' + '%s, ' *
                  (len(tables) - 1) + '%s CASCADE;') % tables)
             print("Done.") if verbose else ""
         except Exception as e:
             print("Failed. CAUSE: \"{}\"".format(e))
示例#2
0
 def drop_schema(self,
                 *schema_names,
                 confirmation_required=True,
                 verbose=False):
     """
     :param schema_names: [str] name of one schema, or names of multiple schemas
     :param confirmation_required: [bool] (default: True)
     :param verbose: [bool] (default: False)
     """
     schemas, schemas_msg = self.multi_names_msg(*schema_names,
                                                 desc='schema')
     if confirmed(
             "Confirmed to drop the {} from the database \"{}\"".format(
                 schemas_msg, self.database_name),
             confirmation_required=confirmation_required):
         try:
             print("Dropping the {} ... ".format(schemas_msg),
                   end="") if verbose else ""
             self.engine.execute('DROP SCHEMA IF EXISTS ' +
                                 ('%s, ' *
                                  (len(schemas) - 1) + '%s') % schemas +
                                 ' CASCADE;')
             print("Done.") if verbose else ""
         except Exception as e:
             print("Failed. CAUSE: \"{}\"".format(e))
示例#3
0
def download_bbbike_subregion_osm_all_files(
        subregion_name,
        download_dir=None,
        download_confirmation_required=True):
    """
    :param subregion_name: [str]
    :param download_dir: [str or None]
    :param download_confirmation_required: [bool]

    Example:
        subregion_name                 = 'leeds'
        download_dir                   = None
        download_confirmation_required = True
        download_bbbike_subregion_osm_all_files(subregion_name, download_dir, download_confirmation_required)
    """
    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    bbbike_download_dictionary = fetch_bbbike_download_catalogue(
        "BBBike-download-catalogue")
    sub_download_catalogue = bbbike_download_dictionary[subregion_name_]

    data_dir = cd_dat_bbbike(
        subregion_name_) if not download_dir else regulate_input_data_dir(
            download_dir)

    if confirmed(
            "Confirm to download all available BBBike data for \"{}\"?".format(
                subregion_name_),
            confirmation_required=download_confirmation_required):
        print("\nStart to download all available OSM data for \"{}\" ... \n".
              format(subregion_name_))
        for download_url, osm_filename in zip(sub_download_catalogue.URL,
                                              sub_download_catalogue.Filename):
            print("\n\n\"{}\" (below): ".format(osm_filename))
            try:
                path_to_file = os.path.join(data_dir, osm_filename) if not download_dir \
                    else os.path.join(data_dir, subregion_name_, osm_filename)
                download(download_url, path_to_file)
                # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5:
                #     time.sleep(5)
            except Exception as e:
                print("\nFailed to download \"{}\". {}.".format(
                    osm_filename, e))
        print(
            "\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format(
                subregion_name_, os.path.join(data_dir, subregion_name_)))
    else:
        print("The downloading process was not activated.")
示例#4
0
def download_bbbike_subregion_osm(*subregion_name,
                                  osm_file_format,
                                  download_dir=None,
                                  update=False,
                                  download_confirmation_required=True):
    """
    :param subregion_name: [str]
    :param osm_file_format: [str]
    :param download_dir: [str; None (default)]
    :param update: [bool] (default: False)
    :param download_confirmation_required: [bool] (default: True)

    Example:
        subregion_name                 = 'leeds'
        osm_file_format                = 'pbf'
        download_dir                   = None
        update                         = False
        download_confirmation_required = True
        download_bbbike_subregion_osm(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir,
                                      update=update, download_confirmation_required=download_confirmation_required)
    """
    for sub_reg_name in subregion_name:
        subregion_name_, osm_filename, download_url, path_to_file = validate_bbbike_download_info(
            sub_reg_name, osm_file_format, download_dir)

        if os.path.isfile(path_to_file) and not update:
            print("\"{}\" is already available for \"{}\" at: \n\"{}\".\n".
                  format(osm_filename, subregion_name_, path_to_file))

        else:
            if confirmed("\nTo download {} data for {}".format(
                    osm_file_format, subregion_name_),
                         confirmation_required=download_confirmation_required):
                try:
                    download(download_url, path_to_file)
                    print(
                        "\n\"{}\" has been downloaded for \"{}\", which is now available at \n\"{}\".\n"
                        .format(osm_filename, subregion_name_, path_to_file))

                    if os.path.getsize(path_to_file) / (1024**2) <= 5:
                        time.sleep(5)

                except Exception as e:
                    print("\nFailed to download \"{}\". {}.".format(
                        osm_filename, e))
            else:
                print("The downloading process was not activated.")
示例#5
0
def download_sub_subregion_osm_file(*subregion_name,
                                    osm_file_format,
                                    download_dir=None,
                                    update=False,
                                    download_confirmation_required=True,
                                    interval_sec=5,
                                    verbose=False):
    """
    :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london'
    :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2"
    :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory)
    :param update: [bool] (default: False) whether to update (i.e. re-download) data
    :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading
    :param interval_sec: [int; None] (default: 5) interval (in sec) between downloading two subregions
    :param verbose: [bool] (default: True)

    Example:
        subregion_name_1               = 'bedfordshire'
        subregion_name_2               = 'rutland'
        osm_file_format                = ".osm.pbf"
        download_dir                   = None
        update                         = False
        download_confirmation_required = True
        verbose                        = True
        interval_sec                   = 5
        verbose                        = False
        download_sub_subregion_osm_file(subregion_name_1, subregion_name_2, osm_file_format=osm_file_format,
                                        download_dir=download_dir, update=update,
                                        download_confirmation_required=download_confirmation_required,
                                        interval_sec=interval_sec, verbose=verbose)
    """
    subregions = retrieve_names_of_subregions_of(*subregion_name)
    if confirmed(
            "\nTo download {} data for all the following subregions: \n{}?\n".
            format(osm_file_format, ", ".join(subregions)),
            confirmation_required=download_confirmation_required):
        download_subregion_osm_file(*subregions,
                                    osm_file_format=osm_file_format,
                                    download_dir=download_dir,
                                    update=update,
                                    download_confirmation_required=False,
                                    verbose=verbose)
        if interval_sec:
            time.sleep(interval_sec)
示例#6
0
def collect_continents_subregion_tables(confirmation_required=True,
                                        verbose=False):
    """
    :param confirmation_required: [bool] (default: True) whether to confirm before starting to collect the information
    :param verbose: [bool] (default: False)

    Example:
        confirmation_required = True
        verbose               = True
        collect_continents_subregion_tables(confirmation_required, verbose)
    """
    if confirmed("To collect information about subregions of each continent? ",
                 confirmation_required=confirmation_required):
        try:
            home_link = 'https://download.geofabrik.de/'
            source = requests.get(home_link)
            soup = bs4.BeautifulSoup(source.text,
                                     'lxml').find_all('td',
                                                      {'class': 'subregion'})
            source.close()
            continent_names = [td.a.text for td in soup]
            continent_links = [
                urllib.parse.urljoin(home_link, td.a['href']) for td in soup
            ]
            subregion_tbls = dict(
                zip(continent_names, [
                    get_subregion_table(url, verbose)
                    for url in continent_links
                ]))
            save_pickle(subregion_tbls,
                        cd_dat("GeoFabrik-continents-subregion-tables.pickle"),
                        verbose=verbose)
        except Exception as e:
            print(
                "Failed to collect the required information ... {}.".format(e))
    else:
        print(
            "The information collection process was not activated. The existing local copy will be loaded instead."
        )
示例#7
0
 def drop(self,
          database_name=None,
          confirmation_required=True,
          verbose=False):
     """
     :param database_name: [str; None (default)] database to be disconnected; if None, to disconnect the current one
     :param confirmation_required: [bool] (default: True)
     :param verbose: [bool] (default: False)
     """
     db_name = self.database_name if database_name is None else database_name
     if confirmed("Confirmed to drop the database \"{}\" for {}@{}?".format(
             db_name, self.user, self.host),
                  confirmation_required=confirmation_required):
         self.disconnect(db_name)
         try:
             print("Dropping the database \"{}\" ... ".format(db_name),
                   end="") if verbose else ""
             self.engine.execute(
                 'DROP DATABASE IF EXISTS "{}"'.format(db_name))
             print("Done.") if verbose else ""
         except Exception as e:
             print("Failed. CAUSE: \"{}\"".format(e))
示例#8
0
def update_pkg_metadata(confirmation_required=True, verbose=True):

    if confirmed("Updating package metadata may take a few minutes. Continue?"):

        collect_subregion_info_catalogue(confirmation_required=confirmation_required, verbose=verbose)

        time.sleep(10)

        collect_continents_subregion_tables(confirmation_required=confirmation_required, verbose=verbose)

        time.sleep(10)

        collect_region_subregion_tier(confirmation_required=confirmation_required, update=False, verbose=verbose)

        time.sleep(10)

        collect_bbbike_subregion_catalogue(confirmation_required=confirmation_required, verbose=verbose)

        time.sleep(10)

        collect_bbbike_download_catalogue(confirmation_required=confirmation_required, verbose=verbose)

        if verbose:
            print("\nUpdate finished.")
示例#9
0
def collect_bbbike_subregion_catalogue(confirmation_required=True,
                                       verbose=False):
    """
    :param confirmation_required: [bool] (default: True)
    :param verbose: [bool] (default: False)

    Example:
        confirmation_required = True
        verbose               = True
        collect_bbbike_subregion_catalogue(confirmation_required, verbose)
    """
    if confirmed("To collect BBBike subregion catalogue? ",
                 confirmation_required=confirmation_required):
        try:
            home_url = 'http://download.bbbike.org/osm/bbbike/'
            bbbike_subregion_catalogue = pd.read_html(
                home_url, header=0, parse_dates=['Last Modified'])[0].drop(0)
            bbbike_subregion_catalogue.Name = bbbike_subregion_catalogue.Name.map(
                lambda x: x.strip('/'))

            save_pickle(bbbike_subregion_catalogue,
                        cd_dat("BBBike-subregion-catalogue.pickle"),
                        verbose=verbose)

            bbbike_subregion_names = bbbike_subregion_catalogue.Name.tolist()
            save_pickle(bbbike_subregion_names,
                        cd_dat("BBBike-subregion-name-list.pickle"),
                        verbose=verbose)

        except Exception as e:
            print("Failed to get the required information ... {}.".format(e))

    else:
        print(
            "The information collection process was not activated. The existing local copy will be loaded instead."
        )
示例#10
0
def collect_bbbike_download_catalogue(confirmation_required=True,
                                      verbose=False):
    """
    :param confirmation_required: [bool] (default: True)
    :param verbose: [bool] (default: False)

    Example:
        confirmation_required = True
        verbose               = True
        collect_bbbike_download_catalogue(confirmation_required, verbose)
    """

    #
    def collect_bbbike_subregion_download_catalogue(subregion_name):
        """
        :param subregion_name: [str]

        Example:
            subregion_name        = 'leeds'
            confirmation_required = True
            verbose               = True
            collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required, verbose)
        """
        def parse_dlc(dlc):
            dlc_href = dlc.get('href')  # URL
            filename, download_url = dlc_href.strip(
                './'), urllib.parse.urljoin(url, dlc_href)
            if not dlc.has_attr('title'):
                file_format, file_size, last_update = 'Poly', None, None
            else:
                if len(dlc.contents) < 3:
                    file_format, file_size = 'Txt', None
                else:
                    file_format, file_size, _ = dlc.contents  # File type and size
                    file_format, file_size = file_format.strip(
                    ), file_size.text
                last_update = pd.to_datetime(dlc.get('title'))  # Date and time
            parsed_dat = [
                filename, download_url, file_format, file_size, last_update
            ]
            return parsed_dat

        subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
        #
        try:
            print("  \"{}\" ... ".format(subregion_name_),
                  end="") if verbose else ""
            url = 'https://download.bbbike.org/osm/bbbike/{}/'.format(
                subregion_name_)

            source = urllib.request.urlopen(url)

            import bs4
            source_soup = bs4.BeautifulSoup(source, 'lxml')
            download_links_class = source_soup.find_all(
                name='a', attrs={'class': ['download_link', 'small']})

            subregion_downloads_catalogue = pd.DataFrame(
                parse_dlc(x) for x in download_links_class)
            subregion_downloads_catalogue.columns = [
                'Filename', 'URL', 'DataType', 'Size', 'LastUpdate'
            ]

            # path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle")
            # save_pickle(subregion_downloads_catalogue, path_to_file, verbose=verbose)
            print("Done. ") if verbose else ""

        except Exception as e_:
            subregion_downloads_catalogue = None
            print("Failed. {}".format(subregion_name_, e_)) if verbose else ""

        return subregion_downloads_catalogue

    if confirmed("To collect BBBike download dictionary? ",
                 confirmation_required=confirmation_required):
        try:
            bbbike_subregion_names = fetch_bbbike_subregion_catalogue(
                "BBBike-subregion-name-list", verbose=verbose)
            print("Collecting BBBike download catalogue for: "
                  ) if verbose else ""
            download_catalogue = [
                collect_bbbike_subregion_download_catalogue(subregion_name)
                for subregion_name in bbbike_subregion_names
            ]

            sr_name, sr_download_catalogue = bbbike_subregion_names[
                0], download_catalogue[0]

            # Available file formats
            file_fmt = [
                re.sub('{}|CHECKSUM'.format(sr_name), '', f)
                for f in sr_download_catalogue.Filename
            ]
            save_pickle(file_fmt[:-2],
                        cd_dat("BBBike-osm-file-formats.pickle"),
                        verbose=verbose)

            # Available data types
            data_typ = sr_download_catalogue.DataType.tolist()
            save_pickle(data_typ[:-2],
                        cd_dat("BBBike-osm-data-types.pickle"),
                        verbose=verbose)

            # available_file_formats = dict(zip(file_fmt, file_ext))

            downloads_dictionary = dict(
                zip(bbbike_subregion_names, download_catalogue))
            save_pickle(downloads_dictionary,
                        cd_dat("BBBike-download-catalogue.pickle"),
                        verbose=verbose)

        except Exception as e:
            print("Failed to collect BBBike download dictionary. {}".format(
                e)) if verbose else ""
示例#11
0
def psql_osm_pbf_data_extracts(*subregion_name,
                               username='******',
                               password=None,
                               host='localhost',
                               port=5432,
                               database_name='OSM_Geofabrik_PBF',
                               data_dir=None,
                               update_osm_pbf=False,
                               if_table_exists='replace',
                               file_size_limit=50,
                               parsed=True,
                               fmt_other_tags=True,
                               fmt_single_geom=True,
                               fmt_multi_geom=True,
                               pickle_raw_file=False,
                               rm_raw_file=False,
                               confirmation_required=True,
                               verbose=False):
    """
    :param subregion_name: [str]
    :param username: [str] (default: 'postgres')
    :param password: [None (default); anything as input]
    :param host: [str] (default: 'localhost')
    :param port: [int] (default: 5432)
    :param database_name: [str] (default: 'OSM_Geofabrik')
    :param data_dir: [str; None (default)]
    :param update_osm_pbf: [bool] (default: False)
    :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail'
    :param file_size_limit: [int] (default: 100)
    :param parsed: [bool] (default: True)
    :param fmt_other_tags: [bool] (default: True)
    :param fmt_single_geom: [bool] (default: True)
    :param fmt_multi_geom: [bool] (default: True)
    :param pickle_raw_file: [bool] (default: False)
    :param rm_raw_file: [bool] (default: False)
    :param confirmation_required: [bool] (default: True)
    :param verbose: [bool] (default: False)

    Example:
        subregions              = retrieve_names_of_subregions_of('England')
        confirmation_required   = True
        username                = '******'
        password                = None
        host                    = 'localhost'
        port                    = 5432
        database_name           = 'geofabrik_osm_pbf'
        data_dir                = cd("test_osm_dump")
        update_osm_pbf          = False
        if_table_exists         = 'replace'
        file_size_limit         = 50
        parsed                  = True
        fmt_other_tags          = True
        fmt_single_geom         = True
        fmt_multi_geom          = True
        pickle_raw_file         = True
        rm_raw_file             = True
        verbose                 = True
        psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None,
                                       update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True,
                                       fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True,
                                       rm_raw_file=False, verbose=False)
    """
    if not subregion_name:
        subregion_names = fetch_region_subregion_tier(
            "GeoFabrik-non-subregion-list")
        confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? "
    else:
        subregion_names = retrieve_names_of_subregions_of(*subregion_name)
        confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format(
            ", ".join(subregion_names))

    if confirmed(confirm_msg, confirmation_required=confirmation_required):

        # Connect to PostgreSQL server
        osmdb = OSM(username,
                    password,
                    host,
                    port,
                    database_name=database_name)

        err_subregion_names = []
        for subregion_name_ in subregion_names:
            default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file(
                subregion_name_, ".osm.pbf")
            if not data_dir:  # Go to default file path
                path_to_osm_pbf = default_path_to_pbf
            else:
                osm_pbf_dir = regulate_input_data_dir(data_dir)
                path_to_osm_pbf = os.path.join(osm_pbf_dir,
                                               default_pbf_filename)

            download_subregion_osm_file(subregion_name_,
                                        osm_file_format=".osm.pbf",
                                        download_dir=data_dir,
                                        update=update_osm_pbf,
                                        download_confirmation_required=False,
                                        verbose=verbose)

            file_size_in_mb = round(
                os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

            try:
                if file_size_in_mb <= file_size_limit:

                    subregion_osm_pbf = read_osm_pbf(
                        subregion_name_,
                        data_dir,
                        parsed,
                        file_size_limit,
                        fmt_other_tags,
                        fmt_single_geom,
                        fmt_multi_geom,
                        update=False,
                        download_confirmation_required=False,
                        pickle_it=pickle_raw_file,
                        rm_osm_pbf=False,
                        verbose=verbose)

                    if subregion_osm_pbf is not None:
                        osmdb.dump_osm_pbf_data(subregion_osm_pbf,
                                                table_name=subregion_name_,
                                                if_exists=if_table_exists,
                                                verbose=verbose)
                        del subregion_osm_pbf
                        gc.collect()

                else:
                    print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_)) \
                        if verbose else ""
                    # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html
                    raw_osm_pbf = ogr.Open(path_to_osm_pbf)
                    layer_count = raw_osm_pbf.GetLayerCount()
                    for i in range(layer_count):
                        layer = raw_osm_pbf.GetLayerByIndex(
                            i)  # Hold the i-th layer
                        layer_name = layer.GetName()
                        print("                       {} ... ".format(
                            layer_name),
                              end="") if verbose else ""
                        try:
                            features = [
                                feature for _, feature in enumerate(layer)
                            ]
                            feats_no, chunks_no = len(features), math.ceil(
                                file_size_in_mb / file_size_limit)
                            feats = split_list(features, chunks_no)

                            del features
                            gc.collect()

                            if osmdb.subregion_table_exists(layer_name, subregion_name_) and \
                                    if_table_exists == 'replace':
                                osmdb.drop_subregion_data_by_layer(
                                    subregion_name_, layer_name)

                            # Loop through all available features
                            for feat in feats:
                                lyr_dat = pd.DataFrame(
                                    rapidjson.loads(f.ExportToJson())
                                    for f in feat)
                                lyr_dat = parse_osm_pbf_layer_data(
                                    lyr_dat, layer_name, fmt_other_tags,
                                    fmt_single_geom, fmt_multi_geom)
                                if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append'
                                osmdb.dump_osm_pbf_data_by_layer(
                                    lyr_dat,
                                    layer_name,
                                    subregion_name_,
                                    if_exists=if_exists_)
                                del lyr_dat
                                gc.collect()

                            print("Done. Total amount of features: {}".format(
                                feats_no)) if verbose else ""

                        except Exception as e:
                            print("Failed. {}".format(e))

                    raw_osm_pbf.Release()
                    del raw_osm_pbf
                    gc.collect()

                if rm_raw_file:
                    remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)

            except Exception as e:
                print(e)
                err_subregion_names.append(subregion_name_)

            if subregion_name_ != subregion_names[-1]:
                time.sleep(60)

        if len(err_subregion_names) == 0:
            print("Mission accomplished.\n") if verbose else ""
        else:
            print(
                "Errors occurred when parsing data of the following subregion(s):"
            )
            print(*err_subregion_names, sep=", ")

        osmdb.disconnect()
        del osmdb
示例#12
0
def download_subregion_osm_file(*subregion_name,
                                osm_file_format,
                                download_dir=None,
                                update=False,
                                download_confirmation_required=True,
                                deep_retry=False,
                                verbose=False):
    """
    :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london'
    :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2"
    :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory)
    :param update: [bool] (default: False) whether to update (i.e. re-download) data
    :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading
    :param deep_retry: [bool] (default: False)
    :param verbose: [bool] (default: True)

    Example:
        subregion_name                 = 'london'
        osm_file_format                = ".osm.pbf"
        download_dir                   = None
        update                         = False
        download_confirmation_required = True
        verbose                        = True
        download_subregion_osm_file(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir,
                                    update=update, download_confirmation_required=download_confirmation_required,
                                    verbose=verbose)
    """
    for sub_reg_name in subregion_name:

        # Get download URL
        subregion_name_, download_url = get_subregion_download_url(
            sub_reg_name, osm_file_format, update=False)

        if pd.isna(download_url):
            if verbose:
                print(
                    "\"{}\" data is not available for \"{}\" from the server. "
                    "Try to download the data of its subregions instead. ".
                    format(osm_file_format, subregion_name_))
            sub_subregions = retrieve_names_of_subregions_of(subregion_name_,
                                                             deep=deep_retry)
            download_dir_ = cd(
                download_dir,
                subregion_name_.replace(" ", "-").lower() +
                os.path.splitext(osm_file_format)[0])
            download_subregion_osm_file(
                *sub_subregions,
                osm_file_format=osm_file_format,
                download_dir=download_dir_,
                update=update,
                download_confirmation_required=download_confirmation_required,
                verbose=verbose)
        else:
            if not download_dir:
                # Download the requested OSM file to default directory
                osm_filename, path_to_file = get_default_path_to_osm_file(
                    subregion_name_, osm_file_format, mkdir=True)
            else:
                regulated_dir = regulate_input_data_dir(download_dir)
                osm_filename = get_default_osm_filename(
                    subregion_name_, osm_file_format=osm_file_format)
                path_to_file = os.path.join(regulated_dir, osm_filename)

            if os.path.isfile(path_to_file) and not update:
                print(
                    "\n\"{}\" for \"{}\" is already available: \"{}\".".format(
                        osm_filename, subregion_name_,
                        path_to_file)) if verbose else ""
            else:
                op = "Updating" if os.path.isfile(
                    path_to_file) else "Downloading"
                if confirmed(
                        "To download the {} data of \"{}\", saved as \"{}\"\n".
                        format(osm_file_format, subregion_name_, path_to_file),
                        confirmation_required=download_confirmation_required):
                    try:
                        from pyhelpers.download import download
                        download(download_url, path_to_file)
                        if verbose:
                            print("{} \"{}\" for \"{}\" ... Done.".format(
                                op, osm_filename, subregion_name_))
                    except Exception as e:
                        print("Failed to download \"{}\". {}.\n".format(
                            osm_filename, e)) if verbose else ""
                else:
                    print("The {} of \"{}\" was cancelled.\n".format(
                        op.lower(), osm_filename)) if verbose else ""
示例#13
0
def collect_region_subregion_tier(confirmation_required=True,
                                  update=False,
                                  verbose=False):
    """
    :param confirmation_required: [bool] (default: True) whether to confirm before collecting region-subregion tier
    :param update: [bool] (default: False)
    :param verbose: [bool] (default: False)

    Example:
        confirmation_required = True
        update                = False
        verbose               = True
        collect_region_subregion_tier(confirmation_required, update, verbose)
    """

    # Find out the all regions and their subregions
    def compile_region_subregion_tier(sub_reg_tbls):
        """
        :param sub_reg_tbls: [pd.DataFrame] obtained from fetch_continents_subregion_tables()
        :return: ([dict], [list]) a dictionary of region-subregion, and a list of (sub)regions without subregions
        """
        having_subregions = copy.deepcopy(sub_reg_tbls)
        region_subregion_tiers = copy.deepcopy(sub_reg_tbls)

        non_subregions_list = []
        for k, v in sub_reg_tbls.items():
            if v is not None and isinstance(v, pd.DataFrame):
                region_subregion_tiers = update_nested_dict(
                    sub_reg_tbls, {k: set(v.Subregion)})
            else:
                non_subregions_list.append(k)

        for x in non_subregions_list:
            having_subregions.pop(x)

        having_subregions_temp = copy.deepcopy(having_subregions)

        while having_subregions_temp:

            for region_name, subregion_table in having_subregions.items():
                #
                subregion_names, subregion_links = subregion_table.Subregion, subregion_table.SubregionURL
                sub_subregion_tables = dict(
                    zip(subregion_names, [
                        get_subregion_table(link) for link in subregion_links
                    ]))

                subregion_index, without_subregion_ = compile_region_subregion_tier(
                    sub_subregion_tables)
                non_subregions_list += without_subregion_

                region_subregion_tiers.update({region_name: subregion_index})

                having_subregions_temp.pop(region_name)

        # Russian Federation in both pages of Asia and Europe, so that there are duplicates in non_subregions_list
        import more_itertools
        non_subregions_list = list(
            more_itertools.unique_everseen(non_subregions_list))
        return region_subregion_tiers, non_subregions_list

    if confirmed(
            "To compile a region-subregion tier? (Note that it may take a few minutes.) ",
            confirmation_required=confirmation_required):
        print("Compiling a region-subregion tier ... ",
              end="") if verbose else ""
        try:
            subregion_tables = fetch_continents_subregion_tables(update=update)
            region_subregion_tier, non_subregions = compile_region_subregion_tier(
                subregion_tables)
            print("Done. ") if verbose else ""
            save_pickle(region_subregion_tier,
                        cd_dat("GeoFabrik-region-subregion-tier.pickle"),
                        verbose=verbose)
            save_json(region_subregion_tier,
                      cd_dat("GeoFabrik-region-subregion-tier.json"),
                      verbose=verbose)
            save_pickle(non_subregions,
                        cd_dat("GeoFabrik-non-subregion-list.pickle"),
                        verbose=verbose)
        except Exception as e:
            print("Failed to get the required information ... {}.".format(
                e)) if verbose else ""
示例#14
0
def collect_subregion_info_catalogue(confirmation_required=True,
                                     verbose=False):
    """
    :param confirmation_required: [bool] (default: False) whether to confirm before starting to collect information
    :param verbose: [bool] (default: False)

    Example:
        confirmation_required = True
        verbose               = True
        collect_subregion_info_catalogue(confirmation_required, verbose)
    """
    if confirmed(
            "To collect all available subregion links? (Note that it may take a few minutes.) ",
            confirmation_required=confirmation_required):

        home_url = 'http://download.geofabrik.de/'

        try:
            source = requests.get(home_url)
            soup = bs4.BeautifulSoup(source.text, 'lxml')
            source.close()
            # avail_subregions = [td.a.text for td in soup.find_all('td', {'class': 'subregion'})]
            subregion_href = soup.find_all('td', {'class': 'subregion'})
            avail_subregion_urls = (urllib.parse.urljoin(
                home_url, td.a['href']) for td in subregion_href)
            avail_subregion_url_tables_0 = (get_subregion_table(
                sub_url, verbose) for sub_url in avail_subregion_urls)
            avail_subregion_url_tables = [
                tbl for tbl in avail_subregion_url_tables_0 if tbl is not None
            ]

            subregion_url_tables = list(avail_subregion_url_tables)

            while subregion_url_tables:

                subregion_url_tables_ = []

                for subregion_url_table in subregion_url_tables:
                    # subregions = list(subregion_url_table.Subregion)
                    subregion_urls = list(subregion_url_table.SubregionURL)
                    subregion_url_tables_0 = [
                        get_subregion_table(sr_url, verbose)
                        for sr_url in subregion_urls
                    ]
                    subregion_url_tables_ += [
                        tbl for tbl in subregion_url_tables_0
                        if tbl is not None
                    ]

                    # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe')
                    # avail_subregions += subregions
                    # avail_subregion_urls += subregion_urls
                    avail_subregion_url_tables += subregion_url_tables_

                subregion_url_tables = list(subregion_url_tables_)

            # All available URLs for downloading
            home_subregion_url_table = get_subregion_table(home_url)
            avail_subregion_url_tables.append(home_subregion_url_table)
            subregion_downloads_index = pd.DataFrame(
                pd.concat(avail_subregion_url_tables, ignore_index=True))
            subregion_downloads_index.drop_duplicates(inplace=True)

            duplicated = subregion_downloads_index[
                subregion_downloads_index.Subregion.duplicated(keep=False)]
            if not duplicated.empty:
                import humanfriendly
                for i in range(0, 2, len(duplicated)):
                    temp = duplicated.iloc[i:i + 2]
                    size = temp['.osm.pbf_Size'].map(
                        lambda x: humanfriendly.parse_size(
                            x.strip('(').strip(')').replace('\xa0', ' ')))
                    idx = size[size == size.min()].index
                    subregion_downloads_index.drop(idx, inplace=True)
                subregion_downloads_index.index = range(
                    len(subregion_downloads_index))

            subregion_downloads_index_json = subregion_downloads_index.set_index(
                'Subregion').to_json()

            # Save subregion_index_downloads to local disk
            save_pickle(
                subregion_downloads_index,
                cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle"),
                verbose=verbose)
            save_json(subregion_downloads_index_json,
                      cd_dat("GeoFabrik-subregion-downloads-catalogue.json"),
                      verbose=verbose)

            avail_subregions = list(subregion_downloads_index.Subregion)
            avail_subregion_urls = list(subregion_downloads_index.SubregionURL)

            # Subregion index - {Subregion: URL}
            subregion_url_index = dict(
                zip(avail_subregions, avail_subregion_urls))

            # Save a list of available subregions locally
            save_pickle(avail_subregions,
                        cd_dat("GeoFabrik-subregion-name-list.pickle"),
                        verbose=verbose)
            # Save subregion_index to local disk
            save_pickle(
                subregion_url_index,
                cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle"),
                verbose=verbose)
            save_json(subregion_url_index,
                      cd_dat("GeoFabrik-subregion-name-url-dictionary.json"),
                      verbose=verbose)

        except Exception as e:
            print("Failed to get the required information ... {}.".format(e))

    else:
        print("The information collection process was not activated.")
示例#15
0
def update_package_data(confirmation_required=True,
                        interval_sec=2,
                        verbose=True):
    """
    Update package data.

    :param confirmation_required: whether asking for confirmation to proceed, defaults to ``True``
    :type confirmation_required: bool
    :param interval_sec: time gap (in seconds) between the updating of different classes, defaults to ``5``
    :type interval_sec: int
    :param verbose: whether to print relevant information in console, defaults to ``True``
    :type verbose: bool, int

    **Example**::

        >>> from pydriosm.updater import update_package_data

        >>> update_package_data(confirmation_required=True, verbose=True)

    |

    (**THE END OF** :ref:`Modules<modules>`.)
    """

    if confirmed("To update resources (which may take a few minutes)\n?"):

        update = True

        geofabrik_downloader = GeofabrikDownloader()

        _ = geofabrik_downloader.get_download_index(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = geofabrik_downloader.get_continents_subregion_tables(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = geofabrik_downloader.get_region_subregion_tier(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = geofabrik_downloader.get_download_catalogue(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = geofabrik_downloader.get_list_of_subregion_names(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        bbbike_downloader = BBBikeDownloader()

        _ = bbbike_downloader.get_list_of_cities(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = bbbike_downloader.get_coordinates_of_cities(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = bbbike_downloader.get_subregion_catalogue(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = bbbike_downloader.get_list_of_subregion_names(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        time.sleep(interval_sec)

        _ = bbbike_downloader.get_download_index(
            update=update,
            confirmation_required=confirmation_required,
            verbose=verbose)

        if verbose:
            print("\nUpdate finished.")