def get_fm_file(file_name, download_if_missing=False): """Read facilitymatcher file, if not present, generate it. :param file_name: str, can be 'FacilityMatchList_forStEWI' or 'FRS_NAICSforStEWI' :param download_if_missing: bool, if True will attempt to load from remote server prior to generating if file not found locally """ file_meta = set_facilitymatcher_meta(file_name, category='') df = load_preprocessed_output(file_meta, paths) if df is None: log.info(f'{file_name} not found in {output_dir}, ' 'writing facility matches to file') if download_if_missing: download_from_remote(file_meta, paths) elif file_name == 'FacilityMatchList_forStEWI': write_fm.write_facility_matches() elif file_name == 'FRS_NAICSforStEWI': write_naics.write_NAICS_matches() df = load_preprocessed_output(file_meta, paths) col_dict = {"FRS_ID": "str", "FacilityID": "str", "NAICS": "str"} for k, v in col_dict.items(): if k in df: df[k] = df[k].astype(v) return df
def standardize_output(year, source='Point'): """Read and parses NEI data. :param year : str, Year of NEI dataset :returns nei: DataFrame of parsed NEI data. """ nei = pd.DataFrame() # read in nei files and concatenate all nei files into one dataframe nei_file_path = _config[year]['file_name'] for file in nei_file_path: filename = OUTPUT_PATH.joinpath(file) if not filename.is_file(): log.info(f'{file} not found in {OUTPUT_PATH}, ' 'downloading source data') # download source file and metadata file_meta = set_stewi_meta(strip_file_extension(file)) file_meta.category = EXT_DIR file_meta.tool = file_meta.tool.lower() download_from_remote(file_meta, paths) # concatenate all other files log.info(f'reading NEI data from {filename}') nei = pd.concat([nei, read_data(year, filename)]) log.debug(f'{str(len(nei))} records') # convert TON to KG nei['FlowAmount'] = nei['FlowAmount'] * USton_kg log.info('adding Data Quality information') if source == 'Point': nei_reliability_table = get_reliability_table_for_source('NEI') nei_reliability_table['Code'] = nei_reliability_table['Code'].astype( float) nei['ReliabilityScore'] = nei['ReliabilityScore'].astype(float) nei = nei.merge(nei_reliability_table, left_on='ReliabilityScore', right_on='Code', how='left') nei['DataReliability'] = nei['DQI Reliability Score'] # drop Code and DQI Reliability Score columns nei = nei.drop( columns=['Code', 'DQI Reliability Score', 'ReliabilityScore']) nei['Compartment'] = 'air' """ # Modify compartment based on stack height (ft) nei.loc[nei['StackHeight'] < 32, 'Compartment'] = 'air/ground' nei.loc[(nei['StackHeight'] >= 32) & (nei['StackHeight'] < 164), 'Compartment'] = 'air/low' nei.loc[(nei['StackHeight'] >= 164) & (nei['StackHeight'] < 492), 'Compartment'] = 'air/high' nei.loc[nei['StackHeight'] >= 492, 'Compartment'] = 'air/very high' """ else: nei['DataReliability'] = 3 # add Source column nei['Source'] = source nei.reset_index(drop=True, inplace=True) return nei
def getFlowByActivity(datasource, year, flowclass=None, geographic_level=None, download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING): """ Retrieves stored data in the FlowByActivity format :param datasource: str, the code of the datasource. :param year: int, a year, e.g. 2012 :param flowclass: str, a 'Class' of the flow. Optional. E.g. 'Water' :param geographic_level: str, a geographic level of the data. Optional. E.g. 'national', 'state', 'county'. :param download_if_missing: bool, if True will attempt to load from remote server prior to generating if file not found locally :return: a pandas DataFrame in FlowByActivity format """ from esupy.processed_data_mgmt import download_from_remote # Set fba metadata name = flowsa.flowbyactivity.set_fba_name(datasource, year) fba_meta = set_fb_meta(name, "FlowByActivity") # Try to load a local version of fba; generate and load if missing fba = load_preprocessed_output(fba_meta, paths) # Remote download if fba is None and download_if_missing: log.info('%s %s not found in %s, downloading from remote source', datasource, str(year), fbaoutputpath) download_from_remote(fba_meta, paths) fba = load_preprocessed_output(fba_meta, paths) if fba is None: log.info('%s %s not found in %s, running functions to generate FBA', datasource, str(year), fbaoutputpath) # Generate the fba flowsa.flowbyactivity.main(year=year, source=datasource) # Now load the fba fba = load_preprocessed_output(fba_meta, paths) if fba is None: log.error('getFlowByActivity failed, FBA not found') else: log.info('Loaded %s %s from %s', datasource, str(year), fbaoutputpath) else: log.info('Loaded %s %s from %s', datasource, str(year), fbaoutputpath) # Address optional parameters if flowclass is not None: fba = fba[fba['Class'] == flowclass] # if geographic level specified, only load rows in geo level if geographic_level is not None: fba = filter_by_geoscale(fba, geographic_level) return fba
def getFlowBySector(methodname, download_FBAs_if_missing=DEFAULT_DOWNLOAD_IF_MISSING, download_FBS_if_missing=DEFAULT_DOWNLOAD_IF_MISSING): """ Loads stored FlowBySector output or generates it if it doesn't exist, then loads :param methodname: string, Name of an available method for the given class :param download_FBAs_if_missing: bool, if True will attempt to load FBAS used in generating the FBS from remote server prior to generating if file not found locally :param download_FBS_if_missing: bool, if True will attempt to load from remote server prior to generating if file not found locally :return: dataframe in flow by sector format """ fbs_meta = set_fb_meta(methodname, "FlowBySector") # Try to load a local version of the FBS fbs = load_preprocessed_output(fbs_meta, paths) # If that didn't work, try to download a remote version of FBS if fbs is None and download_FBS_if_missing: log.info('%s not found in %s, downloading from remote source', methodname, fbsoutputpath) # download and load the FBS parquet subdirectory_dict = {'.log': 'Log'} download_from_remote(fbs_meta, paths, subdirectory_dict=subdirectory_dict) fbs = load_preprocessed_output(fbs_meta, paths) # If that didn't work or wasn't allowed, try to construct the FBS if fbs is None: log.info('%s not found in %s, running functions to generate FBS', methodname, fbsoutputpath) # Generate the fbs, with option to download any required FBAs from # Data Commons flowsa.flowbysector.main( method=methodname, download_FBAs_if_missing=download_FBAs_if_missing) # Now load the fbs fbs = load_preprocessed_output(fbs_meta, paths) # If none of the above worked, log an error message if fbs is None: log.error('getFlowBySector failed, FBS not found') # Otherwise (that is, if one of the above methods successfuly loaded the # FBS), log it. else: log.info('Loaded %s from %s', methodname, fbsoutputpath) return fbs
def read_inventory(inventory_acronym, year, f, download_if_missing=False): """Return the inventory from local directory. If not found, generate it. :param inventory_acronym: like 'TRI' :param year: year as number like 2010 :param f: object of class StewiFormat :param download_if_missing: bool, if True will attempt to load from remote server prior to generating if file not found locally :return: dataframe of stored inventory; if not present returns None """ file_name = inventory_acronym + '_' + str(year) meta = set_stewi_meta(file_name, str(f)) inventory = load_preprocessed_output(meta, paths) method_path = paths.local_path + '/' + meta.category if inventory is None: log.info(f'{meta.name_data} not found in {method_path}') if download_if_missing: meta.tool = meta.tool.lower() # lower case for remote access download_from_remote(meta, paths) # download metadata file metadata_meta = copy.copy(meta) metadata_meta.category = '' metadata_meta.ext = 'json' download_from_remote(metadata_meta, paths) else: log.info('requested inventory does not exist in local directory, ' 'it will be generated...') generate_inventory(inventory_acronym, year) inventory = load_preprocessed_output(meta, paths) if inventory is None: log.error('error generating inventory') if inventory is not None: log.info(f'loaded {meta.name_data} from {method_path}') # ensure dtypes fields = f.field_types() fields = { key: value for key, value in fields.items() if key in list(inventory) } inventory = inventory.astype(fields) return inventory
def getFlowBySector(methodname, download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING): """ Loads stored FlowBySector output or generates it if it doesn't exist, then loads :param methodname: string, Name of an available method for the given class :param download_if_missing: bool, if True will attempt to load from remote server prior to generating if file not found locally :return: dataframe in flow by sector format """ from esupy.processed_data_mgmt import download_from_remote fbs_meta = set_fb_meta(methodname, "FlowBySector") fbs = load_preprocessed_output(fbs_meta, paths) # Remote download if fbs is None and download_if_missing: log.info('%s not found in %s, downloading from remote source', methodname, fbsoutputpath) # download and load the FBS parquet subdirectory_dict = {'.log': 'Log'} download_from_remote(fbs_meta, paths, subdirectory_dict=subdirectory_dict) fbs = load_preprocessed_output(fbs_meta, paths) # If remote download not specified and no FBS, generate the FBS if fbs is None: log.info('%s not found in %s, running functions to generate FBS', methodname, fbsoutputpath) # Generate the fba flowsa.flowbysector.main(method=methodname) # Now load the fba fbs = load_preprocessed_output(fbs_meta, paths) if fbs is None: log.error('getFlowBySector failed, FBS not found') else: log.info('Loaded %s from %s', methodname, fbsoutputpath) else: log.info('Loaded %s from %s', methodname, fbsoutputpath) return fbs
def download_stewicombo_from_remote(name): """Prepare metadata and download file via esupy.""" meta = set_stewicombo_meta(name, category='') log.info(f'attempting download of {name} from {paths.remote_path}') download_from_remote(meta, paths)