def collect_continents_subregion_tables(confirmation_required=True): """ :param confirmation_required: [bool] whether to ask for a confirmation before starting to collect the information """ if confirmed("To collect information about subregions of each continent? ", confirmation_required=confirmation_required): try: home_link = 'https://download.geofabrik.de/' source = requests.get(home_link) soup = bs4.BeautifulSoup(source.text, 'lxml').find_all('td', {'class': 'subregion'}) source.close() continent_names = [td.a.text for td in soup] continent_links = [ urllib.parse.urljoin(home_link, td.a['href']) for td in soup ] subregion_tables = dict( zip(continent_names, [get_subregion_table(url) for url in continent_links])) save_pickle(subregion_tables, cd_dat("GeoFabrik-continents-subregion-tables.pickle")) except Exception as e: print( "Failed to collect the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def collect_bbbike_subregion_catalogue(confirmation_required=True): """ :param confirmation_required: """ if confirmed("To collect BBBike subregion catalogue? ", confirmation_required=confirmation_required): try: home_url = 'http://download.bbbike.org/osm/bbbike/' bbbike_subregion_catalogue = pd.read_html(home_url, header=0, parse_dates=['Last Modified'])[0].drop(0) bbbike_subregion_catalogue.Name = bbbike_subregion_catalogue.Name.map(lambda x: x.strip('/')) save_pickle(bbbike_subregion_catalogue, cd_dat("BBBike-subregion-catalogue.pickle")) bbbike_subregion_names = bbbike_subregion_catalogue.Name.tolist() save_pickle(bbbike_subregion_names, cd_dat("BBBike-subregion-name-list.pickle")) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated. The existing local copy will be loaded instead.")
def collect_bbbike_download_catalogue(confirmation_required=True): """ :param confirmation_required: [bool] """ if confirmed("To collect BBBike download dictionary? ", confirmation_required=confirmation_required): try: bbbike_subregion_names = fetch_bbbike_subregion_catalogue("BBBike-subregion-name-list", update=True) download_catalogue = [ fetch_bbbike_subregion_download_catalogue(subregion_name, update=True, confirmation_required=False) for subregion_name in bbbike_subregion_names] subregion_name, subregion_download_catalogue = bbbike_subregion_names[0], download_catalogue[0] # Available file formats file_fmt = [re.sub('{}|CHECKSUM'.format(subregion_name), '', f) for f in subregion_download_catalogue.Filename] save_pickle(file_fmt[:-2], cd_dat("BBBike-osm-file-formats.pickle")) # Available data types data_typ = subregion_download_catalogue.DataType.tolist() save_pickle(data_typ[:-2], cd_dat("BBBike-osm-data-types.pickle")) # available_file_formats = dict(zip(file_fmt, file_ext)) downloads_dictionary = dict(zip(bbbike_subregion_names, download_catalogue)) save_pickle(downloads_dictionary, cd_dat("BBBike-download-catalogue.pickle")) except Exception as e: print("Failed to collect BBBike download dictionary. {}".format(e)) else: print("The information collection process was not activated. The existing local copy will be loaded instead.")
def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=True): """ :param subregion_name: [str] e.g. 'london' :param data_dir: [str or None] customised path of a .osm.pbf file :param parsed: [bool] :param file_size_limit: [numbers.Number] limit of file size (in MB), e.g. 50, or 100(default) :param fmt_other_tags: [bool] :param fmt_single_geom: [bool] :param fmt_multi_geom: [bool] :param update: [bool] :param download_confirmation_required: [bool] :param pickle_it: [bool] :param rm_osm_pbf: [bool] :return: [dict] or None If 'subregion' is the name of the subregion, the default file path will be used. """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle) else: # If the target file is not available, try downloading it first. download_subregion_osm_file( subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=False) file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: chunks_no = math.ceil( file_size_in_mb / file_size_limit ) # Parsing the '.osm.pbf' file in a chunk-wise way else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") except Exception as e: print("Failed. {}\n".format(e)) osm_pbf_data = None if pickle_it: save_pickle(osm_pbf_data, path_to_pickle) if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf) return osm_pbf_data
def read_shp_zip(subregion_name, layer, feature=None, data_dir=None, update=False, download_confirmation_required=True, pickle_it=False, rm_extracts=False, rm_shp_zip=False): """ :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive :param layer: [str] e.g. 'railways' :param feature: [str] e.g. 'rail'; if None, all available features included; default None :param data_dir: [str or None] :param update: [bool] whether to update the relevant file/information; default False :param download_confirmation_required: [bool] :param pickle_it: [bool] default False :param rm_extracts: [bool] whether to delete extracted files from the .shp.zip file; default False :param rm_shp_zip: [bool] whether to delete the downloaded .shp.zip file; default False :return: [GeoDataFrame] """ shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file( subregion_name, ".shp.zip", mkdir=False) extract_dir = os.path.splitext(path_to_shp_zip)[0] if data_dir: shp_zip_dir = regulate_input_data_dir(data_dir) path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename) extract_dir = os.path.join(shp_zip_dir, os.path.basename(extract_dir)) # Make a local path for saving a pickle file for .shp data sub_name = "-".join( x for x in [shp_zip_filename.replace("-latest-free.shp.zip", ""), layer, feature] if x) path_to_shp_pickle = os.path.join(extract_dir, sub_name + ".shp.pickle") if os.path.isfile(path_to_shp_pickle) and not update: shp_data = load_pickle(path_to_shp_pickle) else: # Download the requested OSM file urlretrieve(download_url, file_path) if not os.path.exists(extract_dir): download_subregion_osm_file( shp_zip_filename, osm_file_format=".shp.zip", download_dir=data_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=False) if os.path.isfile(path_to_shp_zip): extract_shp_zip(path_to_shp_zip, extract_dir, layer=layer) path_to_shp = glob.glob( os.path.join(extract_dir, "*{}*.shp".format(layer))) if len(path_to_shp) == 0: shp_data = None elif len(path_to_shp) == 1: shp_data = gpd.read_file( path_to_shp[0]) # gpd.GeoDataFrame(read_shp_file(path_to_shp)) if feature: path_to_shp_feat = path_to_shp[0].replace( layer, layer + "_" + feature) shp_data = gpd.GeoDataFrame( shp_data[shp_data.fclass == feature]) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile') else: # len(path_to_shp) > 1: if not feature: path_to_orig_shp = [ p for p in path_to_shp if layer + '_a' in p or layer + '_free' in p ] if len(path_to_orig_shp) == 1: # "_a*.shp" is not available shp_data = gpd.read_file(path_to_orig_shp[0]) else: shp_data = [gpd.read_file(p) for p in path_to_shp] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) else: # feature is None path_to_shp_feat = [ p for p in path_to_shp if layer + "_" + feature not in p ] if len(path_to_shp_feat) == 1: # "_a*.shp" does not exist shp_data = gpd.read_file(path_to_shp_feat[0]) shp_data = shp_data[shp_data.fclass == feature] else: # both "_a*" and "_free*" .shp for feature is available shp_data = [ dat[dat.fclass == feature] for dat in (gpd.read_file(p) for p in path_to_shp_feat) ] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat[0].replace( layer, layer + "_" + feature), driver='ESRI Shapefile') if pickle_it: save_pickle(shp_data, path_to_shp_pickle) if os.path.exists(extract_dir) and rm_extracts: # import shutil; shutil.rmtree(extract_dir) for f in glob.glob(os.path.join(extract_dir, "gis_osm*")): # if layer not in f: os.remove(f) if os.path.isfile(path_to_shp_zip) and rm_shp_zip: remove_subregion_osm_file(path_to_shp_zip) return shp_data
def collect_region_subregion_tier(confirmation_required=True): """ :param confirmation_required: [bool] whether to confirm before starting to collect region-subregion tier """ # Find out the all regions and their subregions def compile_region_subregion_tier(sub_reg_tbls): """ :param sub_reg_tbls: [pandas.DataFrame] obtained from fetch_continents_subregion_tables() :return: ([dict], [list]) a dictionary of region-subregion, and a list of (sub)regions without subregions """ having_subregions = copy.deepcopy(sub_reg_tbls) region_subregion_tiers = copy.deepcopy(sub_reg_tbls) non_subregions_list = [] for k, v in sub_reg_tbls.items(): if v is not None and isinstance(v, pd.DataFrame): region_subregion_tiers = update_nested_dict( sub_reg_tbls, {k: set(v.Subregion)}) else: non_subregions_list.append(k) for x in non_subregions_list: having_subregions.pop(x) having_subregions_temp = copy.deepcopy(having_subregions) while having_subregions_temp: for region_name, subregion_table in having_subregions.items(): # subregion_names, subregion_links = subregion_table.Subregion, subregion_table.SubregionURL sub_subregion_tables = dict( zip(subregion_names, [ get_subregion_table(link) for link in subregion_links ])) subregion_index, without_subregion_ = compile_region_subregion_tier( sub_subregion_tables) non_subregions_list += without_subregion_ region_subregion_tiers.update({region_name: subregion_index}) having_subregions_temp.pop(region_name) # Russian Federation in both pages of Asia and Europe, so that there are duplicates in non_subregions_list non_subregions_list = list( more_itertools.unique_everseen(non_subregions_list)) return region_subregion_tiers, non_subregions_list if confirmed( "To compile a region-subregion tier? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): try: subregion_tables = fetch_continents_subregion_tables(update=True) region_subregion_tier, non_subregions = compile_region_subregion_tier( subregion_tables) save_pickle(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.pickle")) save_json(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.json")) save_pickle(non_subregions, cd_dat("GeoFabrik-non-subregion-list.pickle")) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def get_incident_location_furlongs(route_name=None, weather_category=None, shift_yards_same_elr=220, shift_yards_diff_elr=220, update=False, verbose=False): """ Get data of furlongs for incident locations. :param route_name: name of a Route; if ``None`` (default), all available Routes :type route_name: str or None :param weather_category: weather category; if ``None`` (default), all available weather categories :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that ``StartELR == EndELR``, defaults to ``220`` :type shift_yards_same_elr: int or float :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that ``StartELR != EndELR``, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of furlongs for incident locations :rtype: pandas.DataFrame or None **Test**:: >>> from coordinator.furlong import get_incident_location_furlongs weather_category = None shift_yards_same_elr = 220 shift_yards_diff_elr = 440 update = True verbose = True >>> il_furlongs = get_incident_location_furlongs(update=True, verbose=True) >>> il_furlongs.tail() >>> il_furlongs = get_incident_location_furlongs(route_name='Anglia', update=True, verbose=True) >>> il_furlongs.tail() """ filename = "incident-location-furlongs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: incident_location_furlongs = load_pickle(path_to_pickle) else: try: use_col_names = ['Section_Length_Adj', 'Critical_FurlongIDs'] adjusted_mileages_same_start_end_elrs = get_adjusted_mileages_same_start_end_elrs( route_name, weather_category, shift_yards_same_elr, verbose=verbose) ilf_same = adjusted_mileages_same_start_end_elrs[use_col_names] adjusted_mileages_diff_start_end_elrs = get_adjusted_mileages_diff_start_end_elrs( route_name, weather_category, shift_yards_diff_elr, verbose=verbose) ilf_diff = adjusted_mileages_diff_start_end_elrs[use_col_names] furlongs_dat = pd.concat([ilf_same, ilf_diff]) incident_locations = metex.view_metex_schedule8_incident_locations( route_name, weather_category, verbose=verbose) # Merge the above data sets incident_location_furlongs = incident_locations.join(furlongs_dat, how='right') incident_location_furlongs.drop(['StartMileage_num', 'EndMileage_num'], axis=1, inplace=True) incident_location_furlongs.index = range(len(incident_location_furlongs)) save_pickle(incident_location_furlongs, path_to_pickle, verbose=verbose) except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e)) incident_location_furlongs = None return incident_location_furlongs
def collect_subregion_info_catalogue(confirmation_required=True): """ :param confirmation_required: [bool] whether to ask for a confirmation before starting to collect information """ if confirmed( "To collect all available subregion links? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): home_url = 'http://download.geofabrik.de/' try: source = requests.get(home_url) soup = bs4.BeautifulSoup(source.text, 'lxml') source.close() avail_subregions = [ td.a.text for td in soup.find_all('td', {'class': 'subregion'}) ] avail_subregion_urls = [ urllib.parse.urljoin(home_url, td.a['href']) for td in soup.find_all('td', {'class': 'subregion'}) ] avail_subregion_url_tables = [ get_subregion_table(sub_url) for sub_url in avail_subregion_urls ] avail_subregion_url_tables = [ tbl for tbl in avail_subregion_url_tables if tbl is not None ] subregion_url_tables = list(avail_subregion_url_tables) while subregion_url_tables: subregion_url_tables_ = [] for subregion_url_table in subregion_url_tables: subregions = list(subregion_url_table.Subregion) subregion_urls = list(subregion_url_table.SubregionURL) subregion_url_tables_0 = [ get_subregion_table(subregion_url) for subregion_url in subregion_urls ] subregion_url_tables_ += [ tbl for tbl in subregion_url_tables_0 if tbl is not None ] # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe') avail_subregions += subregions avail_subregion_urls += subregion_urls avail_subregion_url_tables += subregion_url_tables_ subregion_url_tables = list(subregion_url_tables_) # Save a list of available subregions locally save_pickle(avail_subregions, cd_dat("GeoFabrik-subregion-name-list.pickle")) # Subregion index - {Subregion: URL} subregion_url_index = dict( zip(avail_subregions, avail_subregion_urls)) # Save subregion_index to local disk save_pickle( subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle")) save_json(subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.json")) # All available URLs for downloading home_subregion_url_table = get_subregion_table(home_url) avail_subregion_url_tables.append(home_subregion_url_table) subregion_downloads_index = pd.DataFrame( pd.concat(avail_subregion_url_tables, ignore_index=True)) subregion_downloads_index.drop_duplicates(inplace=True) subregion_downloads_index_json = subregion_downloads_index.set_index( 'Subregion').to_json() # Save subregion_index_downloads to local disk save_pickle( subregion_downloads_index, cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle")) save_json(subregion_downloads_index_json, cd_dat("GeoFabrik-subregion-downloads-catalogue.json")) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated.")
def get_furlongs_data(route_name=None, weather_category=None, shift_yards_same_elr=220, shift_yards_diff_elr=220, update=False, verbose=False) -> pd.DataFrame: """ Get furlongs data. :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param weather_category: weather category, defaults to ``None`` :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR, defaults to ``220`` :type shift_yards_same_elr: int or float :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR != EndELR, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of furlongs for incident locations :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_furlongs_data weather_category = None shift_yards_same_elr = 220 shift_yards_diff_elr = 220 update = True verbose = True route_name = None furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr, update, verbose) print(furlongs_data) route_name = 'Anglia' furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr, update, verbose) print(furlongs_data) """ filename = "furlongs" pickle_filename = make_filename( filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: furlongs_data = load_pickle(path_to_pickle) else: try: # Data of incident furlongs: both start and end identified by the same ELR furlongs_data_same_elr = get_furlongs_same_start_end_elrs( route_name=route_name, weather_category=weather_category, shift_yards_same_elr=shift_yards_same_elr, verbose=verbose) # Data of incident furlongs: start and end are identified by different ELRs furlongs_data_diff_elr = get_furlongs_diff_start_end_elrs( route_name=route_name, weather_category=weather_category, shift_yards_diff_elr=shift_yards_diff_elr, verbose=verbose) # Merge the above two data sets furlongs_data = furlongs_data_same_elr.append(furlongs_data_diff_elr) furlongs_data.drop_duplicates(['AssetNumber', 'StructuredPlantNumber'], inplace=True) furlongs_data.sort_index(inplace=True) save_pickle(furlongs_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e)) furlongs_data = None return furlongs_data
def get_furlongs_diff_start_end_elrs(route_name=None, weather_category=None, shift_yards_diff_elr=220, update=False, verbose=False): """ Get furlongs data for incident locations each identified by the same start and end ELRs, i.e. StartELR != EndELR. :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param weather_category: weather category; if ``None`` (default), all weather categories :type weather_category: str or None :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: furlongs data of incident locations each identified by the same start and end ELRs :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_furlongs_diff_start_end_elrs route_name = None weather_category = None shift_yards_diff_elr = 220 update = True verbose = True furlongs_diff_start_end_elr = get_furlongs_diff_start_end_elrs( route_name, weather_category, shift_yards_diff_elr, update, verbose) print(furlongs_diff_start_end_elr) """ filename = "furlongs-diff-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: furlongs_diff_start_end_elr = load_pickle(path_to_pickle) return furlongs_diff_start_end_elr else: adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, verbose=verbose) try: # Get furlong information nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) # Form a list containing all the furlong IDs furlong_ids = list(set(itertools.chain(*adj_mileages.Critical_FurlongIDs))) # Select critical (i.e. incident) furlongs furlongs_diff_start_end_elr = nr_furlong_data.loc[furlong_ids] # Save 'incident_furlongs_diff_start_end_elr' save_pickle(furlongs_diff_start_end_elr, path_to_pickle, verbose=verbose) return furlongs_diff_start_end_elr except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, update=False, verbose=False): """ Get adjusted mileages for each incident location where StartELR != EndELR. :param route_name: name of a Route; if ``None``, all Routes :type route_name: str or None :param weather_category: weather category; if ``None``, all weather categories :type weather_category: str or None :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: adjusted mileages for each incident location where StartELR != EndELR :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_adjusted_mileages_diff_start_end_elrs route_name = None weather_category = None shift_yards_diff_elr = 220 update = True verbose = True adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, update, verbose) print(adj_mileages) """ filename = "adjusted-mileages-diff-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: return load_pickle(path_to_pickle) else: try: # Get data for which the 'StartELR' and 'EndELR' are DIFFERENT incident_locations_diff_start_end_elr = metex.view_metex_schedule8_incident_locations( route_name, weather_category, start_and_end_elr='diff', verbose=verbose) # Get connecting points for different (ELRs, mileages) connecting_nodes = get_connecting_nodes(incident_locations_diff_start_end_elr, route_name, update=False, verbose=False) # Find End Mileage and Start Mileage of StartELR and EndELR, respectively locations_conn = incident_locations_diff_start_end_elr.join( connecting_nodes.set_index(['StanoxSection'], append=True), on=list(connecting_nodes.index.names) + ['StanoxSection'], rsuffix='_conn').dropna() locations_conn.drop(columns=[x for x in locations_conn.columns if '_conn' in x], inplace=True) # Remove the data records where connecting nodes are unknown locations_conn = locations_conn[~((locations_conn.StartELR_EndMileage == '') | (locations_conn.EndELR_StartMileage == ''))] # Convert str mileages to num num_conn_colnames = ['StartELR_EndMileage_num', 'EndELR_StartMileage_num', 'ConnELR_StartMileage_num', 'ConnELR_EndMileage_num'] str_conn_colnames = ['StartELR_EndMileage', 'EndELR_StartMileage', 'ConnELR_StartMileage', 'ConnELR_EndMileage'] locations_conn[num_conn_colnames] = locations_conn[str_conn_colnames].applymap( nr_mileage_str_to_num) # Get furlong information nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) adjusted_conn_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages( nr_furlong_data, x.ConnELR, x.ConnELR_StartMileage_num, x.ConnELR_EndMileage_num, 0) if x.ConnELR != '' else tuple(['', '', np.nan, np.nan, 0.0, []]), axis=1) adjusted_conn_mileages = pd.DataFrame(adjusted_conn_elr_mileages.tolist(), index=locations_conn.index, columns=['Conn_StartMileage_Adj', 'ConnELR_EndMileage_Adj', 'Conn_StartMileage_num_Adj', 'ConnELR_EndMileage_num_Adj', 'ConnELR_Length_Adj', # yards 'ConnELR_Critical_FurlongIDs']) # Processing Start locations adjusted_start_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages( nr_furlong_data, x.StartELR, x.StartMileage_num, x.StartELR_EndMileage_num, shift_yards_diff_elr), axis=1) # Create a dataframe adjusted mileage data of the Start ELRs adjusted_start_mileages = pd.DataFrame(adjusted_start_elr_mileages.tolist(), index=locations_conn.index, columns=['StartMileage_Adj', 'StartELR_EndMileage_Adj', 'StartMileage_num_Adj', 'StartELR_EndMileage_num_Adj', 'StartELR_Length_Adj', # yards 'StartELR_Critical_FurlongIDs']) # Processing End locations adjusted_end_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages(nr_furlong_data, x.EndELR, x.EndELR_StartMileage_num, x.EndMileage_num, shift_yards_diff_elr), axis=1) # Create a dataframe of adjusted mileage data of the EndELRs adjusted_end_mileages = pd.DataFrame(adjusted_end_elr_mileages.tolist(), index=locations_conn.index, columns=['EndELR_StartMileage_Adj', 'EndMileage_Adj', 'EndELR_StartMileage_num_Adj', 'EndMileage_num_Adj', 'EndELR_Length_Adj', # yards 'EndELR_Critical_FurlongIDs']) # Combine 'adjusted_start_mileages' and 'adjusted_end_mileages' adj_mileages = adjusted_start_mileages.join(adjusted_conn_mileages).join( adjusted_end_mileages) adj_mileages.dropna(subset=['StartMileage_num_Adj', 'EndMileage_num_Adj'], inplace=True) adj_mileages['Section_Length_Adj'] = list(zip( adj_mileages.StartELR_Length_Adj, adj_mileages.ConnELR_Length_Adj, adj_mileages.EndELR_Length_Adj)) adj_mileages['Critical_FurlongIDs'] = \ adj_mileages.StartELR_Critical_FurlongIDs + \ adj_mileages.EndELR_Critical_FurlongIDs + \ adj_mileages.ConnELR_Critical_FurlongIDs adj_mileages.Critical_FurlongIDs = adj_mileages.Critical_FurlongIDs.map( lambda x: list(set(x))) save_pickle(adj_mileages, path_to_pickle, verbose=update) return adj_mileages except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr, update=False, verbose=False): """ Get adjusted mileages for each incident location where StartELR == EndELR. :param route_name: name of a Route; if ``None``, all Routes :type route_name: str or None :param weather_category: weather category; if ``None``, all weather categories :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR :type shift_yards_same_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: adjusted mileages for each incident location where StartELR == EndELR :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_adjusted_mileages_same_start_end_elrs route_name = None weather_category = None shift_yards_same_elr = 220 update = True verbose = True adj_mileages = get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr, update, verbose) print(adj_mileages) """ filename = "adjusted-mileages-same-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: adj_mileages = load_pickle(path_to_pickle) return adj_mileages else: try: # Get data of incident locations where the 'StartELR' and 'EndELR' are THE SAME incident_locations = metex.view_metex_schedule8_incident_locations( route_name, weather_category, start_and_end_elr='same', verbose=verbose) # Get furlong information as reference ref_furlongs = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) # Calculate adjusted furlong locations for each incident (for vegetation conditions) # noinspection PyTypeChecker adjusted_mileages = incident_locations.apply( lambda x: adjust_incident_mileages( ref_furlongs, x.StartELR, x.StartMileage_num, x.EndMileage_num, shift_yards_same_elr), axis=1) # Get adjusted mileage data adj_mileages = pd.DataFrame(list(adjusted_mileages), index=incident_locations.index, columns=['StartMileage_Adj', 'EndMileage_Adj', 'StartMileage_num_Adj', 'EndMileage_num_Adj', 'Section_Length_Adj', # yards 'Critical_FurlongIDs']) save_pickle(adj_mileages, path_to_pickle, verbose=verbose) return adj_mileages except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_connecting_nodes(diff_start_end_elr_dat, route_name=None, update=False, verbose=False): """ Get data of connecting points for different ELRs. :param diff_start_end_elr_dat: data frame where StartELR != EndELR :type diff_start_end_elr_dat: pandas.DataFrame :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of connecting points for different ELRs :rtype: pandas.DataFrame **Test**:: from mssqlserver.metex import view_metex_schedule8_incident_locations from models.prototype.furlong import get_connecting_nodes update = False verbose = True route_name = None diff_start_end_elr_dat = view_metex_schedule8_incident_locations( route_name=route_name, start_and_end_elr='diff', verbose=verbose) connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose) print(connecting_nodes) route_name = 'Anglia' diff_start_end_elr_dat = view_metex_schedule8_incident_locations( route_name=route_name, start_and_end_elr='diff', verbose=verbose) connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose) print(connecting_nodes) """ filename = "connections-between-different-ELRs" pickle_filename = make_filename(filename, route_name) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: return load_pickle(path_to_pickle, verbose=verbose) else: try: pickle_filename_temp = make_filename(filename) path_to_pickle_temp = cdd_geodata(pickle_filename_temp) if os.path.isfile(path_to_pickle_temp) and not update: connecting_nodes_all = load_pickle(path_to_pickle_temp) connecting_nodes = get_subset(connecting_nodes_all, route_name) else: diff_elr_mileages = diff_start_end_elr_dat.drop_duplicates() em = ELRMileages() print("Searching for connecting ELRs ... ", end="") if verbose else "" mileage_file_dir = cdd_railway_codes("line data\\elrs-and-mileages\\mileages") # noinspection PyTypeChecker conn_mileages = diff_elr_mileages.apply( lambda x: em.get_conn_mileages(x.StartELR, x.EndELR, update, pickle_mileage_file=True, data_dir=mileage_file_dir), axis=1) print("\nFinished.") if verbose else "" conn_mileages_data = pd.DataFrame(conn_mileages.to_list(), index=diff_elr_mileages.index, columns=['StartELR_EndMileage', 'ConnELR', 'ConnELR_StartMileage', 'ConnELR_EndMileage', 'EndELR_StartMileage']) connecting_nodes = diff_elr_mileages.join(conn_mileages_data) connecting_nodes.set_index(['StartELR', 'StartMileage', 'EndELR', 'EndMileage'], inplace=True) save_pickle(connecting_nodes, path_to_pickle, verbose=verbose) return connecting_nodes except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))