def download_race_data(overwrite=True): """ Downloads ACS race data of interest Inputs: - RAW//CENSUS (root census folder) Outputs: - RAW//CENSUS//ACS_{year}_race.csv """ # download census tabular data census = validate_directory(make_path(RAW, "CENSUS")) print("RACE:") for year in YEARS: # setup folders race_out = make_path(census, f"ACS_{year}_race.csv") print(f"...Fetching race data ({race_out})") try: race = helper.download_race_vars( year, acs_dataset="acs5", state="12", county="086", table=dl_conf.ACS_RACE_TABLE, columns=dl_conf.ACS_RACE_COLUMNS, ) check_overwrite_path(output=race_out, overwrite=overwrite) race.to_csv(race_out, index=False) except: print(f"..ERROR DOWNLOADING RACE DATA ({year})")
def download_census_geo(overwrite=True): """ Download census data - downloads and unzips the census block and blockgroup shapefiles - downloads and writes out to table the ACS race and commute data - downloads LODES data to table Inputs: - RAW//temp_downloads (folder path) - RAW//CENSUS (extract path) - CENSUS_GEO_TYPES (list of geographies) Outputs: - RAW//CENSUS//BG (block groups geogrpahies) - RAW//CENSUS//TABBLOCK (block geographies) """ print("\nFetching CENSUS Geographies...") # download and extract census geographies dl_dir = make_path(RAW, "temp_downloads") ext_dir = make_path(RAW, "CENSUS") for path in [dl_dir, ext_dir]: check_overwrite_path(output=path, overwrite=overwrite) validate_directory(path) for geo_type in dl_conf.CENSUS_GEO_TYPES: census_geo.get_one_geo_type( geo_type=geo_type, download_dir=dl_dir, extract_dir=ext_dir, state=dl_conf.CENSUS_STATE, year=str(SNAPSHOT_YEAR), ) rmtree(dl_dir)
def download_commute_data(overwrite=True): """ Downloads ACS commute data of interest Inputs: - RAW//CENSUS (root census folder) Outputs: - RAW//CENSUS//ACS_{year}_commute.csv """ census = validate_directory(make_path(RAW, "CENSUS")) print("COMMUTE:") for year in YEARS: commute_out = make_path(census, f"ACS_{year}_commute.csv") print(f"...Fetching commute data ({commute_out})") try: commute = helper.download_commute_vars( year, acs_dataset="acs5", state="12", county="086", table=dl_conf.ACS_MODE_TABLE, columns=dl_conf.ACS_MODE_COLUMNS, ) check_overwrite_path(output=commute_out, overwrite=overwrite) commute.to_csv(commute_out, index=False) except: print(f"..ERROR DOWNLOADING COMMUTE DATA ({year})")
def download_urls(overwrite=True): """ Downloads raw data that are easily accessible via web `request' at a url endpoint Inputs: - DOWNLOAD_URL_DICT (dictionary of output_name: url found in config.download_config) Outputs: (11 files) - RAW//{output_name} --> ['Imperviousness', 'MD_Urban_Growth_Boundary', 'Miami-Dade_County_Boundary', 'Municipal_Parks', 'County_Parks', 'Federal_State_Parks', 'Park_Facilities', 'Bike_Lanes', 'Paved_Path', 'Paved_Shoulder', 'Wide_Curb_Lane'] """ for file, url in dl_conf.DOWNLOAD_URL_DICT.items(): _, ext = os.path.splitext(url) if ext == ".zip": out_file = make_path(RAW, f"{file}.zip") elif ext == ".geojson": out_file = make_path(RAW, f"{file}.geojson") else: print("downloader doesnt handle that extension") print(f"Downloading {out_file}") check_overwrite_path(output=out_file, overwrite=overwrite) helper.download_file_from_url(url=url, save_path=out_file)
def download_aggregate_lodes( output_dir, file_type, state, segment, part, job_type, year, agg_geog=None, overwrite=False, ): """ Helper function to fetch lodes data and aggregate to another census geography if one is provided Args: output_dir (str): path to location downloaded files should end up file_type (str): one of three LODES groupings ['od', 'rac', 'wac'] - OD: Origin-Destination data, totals are associated with both a home Census Block and a work Census Block - RAC: Residence Area Characteristic data, jobs are totaled by home Census Block - WAC: Workplace Area Characteristic data, jobs are totaled by work Census Block state (str): The two-character postal abbreviation for the state segment (str): Segment of the workforce, can have the values of [“S000”, “SA01”, “SA02”, “SA03”, “SE01”, “SE02”, “SE03”, “SI01”, “SI02”, “SI03”, ""] part (str): Part of the state file, can have a value of either “main” or “aux”. Complimentary parts of the state file, the main part includes jobs with both workplace and residence in the state and the aux part includes jobs with the workplace in the state and the residence outside of the state. job_type (str): LODES job types (“JT00” for All Jobs, “JT01” for Primary Jobs, “JT02” for All Private Jobs, “JT03” for Private Primary Jobs, “JT04” for All Federal Jobs, or “JT05” for Federal Primary Jobs). year (int): year of LODES data to download agg_geog (str): census geographies to aggregate lodes data to overwrite (bool): if set to True, delete the existing copy of the LODES data Returns: None: writes csv tables of aggregated lodes data in `output_dir` """ st = state.lower() try: out_dir = validate_directory(directory=output_dir) if validate_lodes_download(file_type, state, segment, part, job_type, year, agg_geog): if file_type == "od": # kept for now as it will still download but not aggregate OD lodes_fname = f"{st}_{file_type}_{part}_{job_type}_{str(year)}.csv.gz" else: lodes_fname = ( f"{st}_{file_type}_{segment}_{job_type}_{str(year)}.csv.gz" ) lodes_download_url = f"{LODES_URL}/{st}/{file_type}/{lodes_fname}" lodes_out = make_path(out_dir, lodes_fname) lodes_out = lodes_out.replace(".csv.gz", "_blk.csv.gz") print(f"...downloading {lodes_fname} to {lodes_out}") check_overwrite_path(output=lodes_out, overwrite=overwrite) download_file_from_url(url=lodes_download_url, save_path=lodes_out) else: lodes_out = "" if agg_geog and lodes_out != "": if validate_aggregate_geo_inputs(values=agg_geog, valid=LODES_AGG_GEOS): if isinstance(agg_geog, string_types): agg_geog = [agg_geog] for geog in agg_geog: cross_fname = f"{state}_xwalk.csv.gz" cross_out = make_path(out_dir, cross_fname) agged_out = lodes_out.replace("_blk.csv.gz", f"_{geog}.csv.gz") crosswalk_url = f"{LODES_URL}/{state}/{state}_xwalk.csv.gz" if not os.path.exists(cross_out): print(f"...downloading {cross_fname} to {cross_out}") download_file_from_url(url=crosswalk_url, save_path=cross_out) print(f"...aggregating block group level data to {geog}") agged = aggregate_lodes_data( geo_crosswalk_path=cross_out, lodes_path=lodes_out, file_type=file_type, agg_geo=geog, ) check_overwrite_path(output=agged_out, overwrite=overwrite) agged.to_csv(agged_out, compression="gzip", index=False) else: print( "No aggregation requested or there is no LODES data for this request" ) except: print("something failed")
def download_osm_buildings( output_dir, polygon=None, bbox=None, data_crs=None, keep_fields=["osmid", "building", "name", "geometry"], suffix="", overwrite=False ): """ Uses an Overpass query to fetch the OSM building polygons within a specified bounding box or the bounding box of a provided shapefile. Args: output_dir (str): Path to output directory. polygon (str): path to a shapefile or geojson object readable by geopandas bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of EPSG:4326-style coordinates, defining a bounding box for the area from which to fetch OSM features. Only required when `study_area_polygon_path` is not provided. See module notes for performance and suggestions on usage. data_crs (int): integer value representing an EPSG code keep_fields (list): list of fields to keep in output dataset suffix (str): string value to be added to the end of the output folder overwrite (bool): if set to True, delete the existing copy of buildings Returns: buildings_gdf (gpd.GeoDataFrame): A gdf of OSM building features. By default, the CRS of the gdf will be EPSG:4326 unless a tranformation is specified using `transfor_epsg` or a shape file with a differnt CRS is provided as `study_area_polygon_path`. Notes: OSM building polygons features will automatically be saved in the `output_dir`s `OSM_Buildings_{YYYYMMDDHHMMSS}.shp` where `YYYYMMDDHHMMSS` is the date and time at which the Overpass query was pushed. This is done for record keeping purposes. """ # Validation of inputs # TODO: separate polygon and bbox validation bounding_box = validate_inputs( study_area_poly=polygon, bbox=bbox, data_crs=data_crs ) # - Output location output_dir = validate_directory(make_path(output_dir, f"buildings_{suffix}")) # Data read in and setup ------------------------------------------------- print("...Pulling building data from Overpass API...") buildings_gdf = ox.geometries_from_bbox( north=bounding_box["north"], south=bounding_box["south"], east=bounding_box["east"], west=bounding_box["west"], tags={"building": True}, ) # drop non-polygon features and subset fields print("...Dropping non-polygon features and unneeded fields") buildings_gdf = buildings_gdf[ buildings_gdf.geom_type.isin(["MultiPolygon", "Polygon"]) ] drop_cols = [col for col in buildings_gdf.columns if col not in keep_fields] buildings_gdf.drop(labels=drop_cols, axis=1, inplace=True) buildings_gdf.reset_index() # Saving ----------------------------------------------------------------- print("...Saving...") dt = datetime.now().strftime("%Y%m%d") file_name = "OSM_Buildings_{}.shp".format(dt) save_path = make_path(output_dir, file_name) check_overwrite_path(output=save_path, overwrite=overwrite) buildings_gdf.to_file(save_path) print("-- saved to: " + save_path) return buildings_gdf
def download_osm_networks( output_dir, polygon=None, bbox=None, data_crs=None, net_types=["drive", "walk", "bike"], pickle_save=False, suffix="", overwrite=False ): """ Download an OpenStreetMap network within the area defined by a polygon feature class or a bounding box. Args: output_dir (str): Path, Path to output directory. Each modal network (specified by `net_types`) is saved to this directory within an epoynmous folder as a shape file. If `pickle_save` is True, pickled graph objects are also stored in this directory in the appropriate subfolders. polygon (str): Path, default=None; Path to study area polygon(s) shapefile. If provided, the polygon features define the area from which to fetch OSM features and `bbox` is ignored. See module notes for performance and suggestions on usage. bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of EPSG:4326-style coordinates, defining a bounding box for the area from which to fetch OSM features. Only required when `study_area_polygon_path` is not provided. See module notes for performance and suggestions on usage. data_crs (int): integer value representing an EPSG code net_types (list): [String,...], default=["drive", "walk", "bike"] A list containing any or all of "drive", "walk", or "bike", specifying the desired OSM network features to be downloaded. pickle_save (bool): default=False; If True, the downloaded OSM networks are saved as python `networkx` objects using the `pickle` module. See module notes for usage. suffix (str): default=""; Downloaded datasets may optionally be stored in folders with a suffix appended, differentiating networks by date, for example. overwrite (bool): if set to True, delete the existing copy of the network(s) Returns: G (dict): A dictionary of networkx graph objects. Keys are mode names based on `net_types`; values are graph objects. """ # Validation of inputs # TODO: separate polygon and bbox validation bounding_box = validate_inputs( study_area_poly=polygon, bbox=bbox, data_crs=data_crs ) # - ensure Network types are valid and formatted correctly net_types = validate_network_types(network_types=net_types) output_dir = validate_directory(output_dir) # Fetch network features mode_nets = {} for net_type in net_types: print("") net_folder = f"{net_type}_{suffix}" print(f"OSMnx '{net_type.upper()}' network extraction") print("-- extracting a composed network by bounding box...") g = ox.graph_from_bbox( north=bounding_box["north"], south=bounding_box["south"], east=bounding_box["east"], west=bounding_box["west"], network_type=net_type, retain_all=True, ) if net_type in ["walk", "bike"]: g = dl_help.trim_components(graph=g) # Pickle if requested if pickle_save: print("-- saving the composed network as pickle") out_f = os.path.join(output_dir, net_folder, "osmnx_composed_net.p") with open(out_f, "wb") as pickle_file: pickle.dump(g, pickle_file) print("---- saved to: {}".format(out_f)) # 2. Saving as shapefile print("-- saving network shapefile...") out_f = os.path.join(output_dir, net_folder) check_overwrite_path(output=out_f, overwrite=overwrite) ox.save_graph_shapefile(G=g, filepath=out_f) # need to change this directory print("---- saved to: " + out_f) # 3. Add the final graph to the dictionary of networks mode_nets[net_type] = g return mode_nets