def get_reported_releases(CASlist): """ Retrieves release info from stewi for a list of CAS :param CASlist: list, a list of CAS in standard CAS format :return: a pandas DataFrame with records for each release with context and facility information """ chem_releases = pd.DataFrame() for k, v in inventories_of_interest.items(): inv = stewi.getInventory(k, v) #filter by chems of interest inv['FlowName'] = inv['FlowName'].apply(lambda x: x.lower()) inv_fl_of_interest = list(chems_stewi_matches[k].values) inv_fl_of_interest = list(filter(None, inv_fl_of_interest)) inv_fl_of_interest = [x.lower() for x in inv_fl_of_interest] inv = inv[inv["FlowName"].isin(inv_fl_of_interest)] inv["Source"] = k inv["Year"] = v #Join with facility data to get location fac = stewi.getInventoryFacilities(k, v) #Filter by fac in chem_releases uniq_facs = pd.unique(inv['FacilityID']) fac = fac[fac["FacilityID"].isin(uniq_facs)] inv = pd.merge(inv, fac, on=['FacilityID']) chem_releases = pd.concat([chem_releases, inv], sort=False) return chem_releases
def stewi_to_sector(yaml_load): """ Returns emissions from stewi in fbs format, requires stewi >= 0.9.5 :param yaml_load: which may contain the following elements: inventory_dict: a dictionary of inventory types and years (e.g., {'NEI':'2017', 'TRI':'2017'}) NAICS_level: desired NAICS aggregation level, using sector_level_key, should match target_sector_level geo_scale: desired geographic aggregation level ('national', 'state', 'county'), should match target_geoscale compartments: list of compartments to include (e.g., 'water', 'air', 'soil'), use None to include all compartments functions: list of functions (str) to call for additional processing :return: df, FBS format """ import stewi # determine if fxns specified in FBS method yaml if 'functions' not in yaml_load: functions = [] else: functions = yaml_load['functions'] NAICS_level_value = sector_level_key[yaml_load['NAICS_level']] # run stewi to generate inventory and filter for LCI df = pd.DataFrame() for database, year in yaml_load['inventory_dict'].items(): inv = stewi.getInventory(database, year, filter_for_LCI=True, US_States_Only=True) inv['Year'] = year inv['MetaSources'] = database df = df.append(inv) if yaml_load['compartments'] is not None: df = df[df['Compartment'].isin(yaml_load['compartments'])] facility_mapping = extract_facility_data(yaml_load['inventory_dict']) # Convert NAICS to string (first to int to avoid decimals) facility_mapping['NAICS'] = \ facility_mapping['NAICS'].astype(int).astype(str) facility_mapping = naics_expansion(facility_mapping) # merge dataframes to assign facility information based on facility IDs df = pd.merge(df, facility_mapping, how='left', on='FacilityID') # add levelized NAICS code prior to aggregation df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value] fbs = prepare_stewi_fbs(df, yaml_load['inventory_dict'], yaml_load['NAICS_level'], yaml_load['geo_scale']) for function in functions: fbs = getattr(sys.modules[__name__], function)(fbs) return fbs
def getInventoriesforFacilityMatches(inventory_dict, facilitymatches, filter_for_LCI, base_inventory=None): """Retrieve stored flowbyfacility datasets based on passed dictionary. Filters them if necessary. Returns only those facilities with an FRS_ID except for those in the base_inventory where all are returned. :param inventory_dict: :param facilitymatches: dataframe matching FacilityMatches format :param filter_for_LCI: :param base_inventory: """ if base_inventory is not None: # Identify the FRS in the base inventory and keep only those # base_inventory_FRS = facilitymatches[ # facilitymatches['Source'] == base_inventory] base_FRS_list = list(pd.unique(facilitymatches[ facilitymatches['Source'] == base_inventory]['FRS_ID'])) columns_to_keep = StewiFormat.FLOWBYFACILITY.fields() + ['Source', 'Year', 'FRS_ID'] inventories = pd.DataFrame() filters = None if filter_for_LCI: filters = ['filter_for_LCI'] for k in inventory_dict.keys(): inventory = stewi.getInventory(k, inventory_dict[k], 'flowbyfacility', filters) if inventory is None: continue inventory["Source"] = k # Merge in FRS_ID, ensure only single FRS added per facility ID, keeping # first listed facmatches = facilitymatches[facilitymatches['Source'] == k] facmatches = facmatches.drop_duplicates(subset=['FacilityID', 'Source'], keep='first') inventory = pd.merge(inventory, facmatches, on=['FacilityID', 'Source'], how='left') if inventory['FRS_ID'].isna().sum() > 0: log.debug('Some facilities missing FRS_ID') # If this isn't the base inventory, filter records for facilities not # found in the base inventory if k is not base_inventory and base_inventory is not None: inventory = inventory[inventory['FRS_ID'].isin( base_FRS_list)] # Add metadata inventory["Year"] = inventory_dict[k] cols_to_keep = [c for c in columns_to_keep if c in inventory] inventory = inventory[cols_to_keep] inventories = pd.concat([inventories, inventory], ignore_index=True) return inventories
def reassign_airplane_emissions(df, year, NAICS_level_value): """ Reassigns emissions from airplanes to NAICS associated with air transportation instead of the NAICS assigned to airports :param df: a dataframe of emissions and mapped faciliites from stewicombo :param year: year as str :param NAICS_level_value: desired NAICS aggregation level, using sector_level_key, should match target_sector_level :return: df """ import stewi from stewicombo.overlaphandler import remove_default_flow_overlaps from stewicombo.globals import addChemicalMatches # subtract emissions for air transportation from airports in NEI airport_NAICS = '4881' air_transportation_SCC = '2275020000' air_transportation_naics = '481111' log.info('Reassigning emissions from air transportation from airports') # obtain and prepare SCC dataset df_airplanes = stewi.getInventory('NEI', year, stewiformat='flowbyprocess') df_airplanes = df_airplanes[df_airplanes['Process'] == air_transportation_SCC] df_airplanes['Source'] = 'NEI' df_airplanes = addChemicalMatches(df_airplanes) df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True) df_airplanes.drop(columns=['Process'], inplace=True) facility_mapping_air = df[['FacilityID', 'NAICS']] facility_mapping_air.drop_duplicates(keep='first', inplace=True) df_airplanes = df_airplanes.merge(facility_mapping_air, how='left', on='FacilityID') df_airplanes['Year'] = year df_airplanes = df_airplanes[df_airplanes['NAICS'].str[0:len(airport_NAICS)] == airport_NAICS] # subtract airplane emissions from airport NAICS at individual facilities df_planeemissions = df_airplanes[['FacilityID', 'FlowName', 'FlowAmount']] df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'}, inplace=True) df = df.merge(df_planeemissions, how='left', on=['FacilityID', 'FlowName']) df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0) df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions'] df.drop(columns=['PlaneEmissions'], inplace=True) # add airplane emissions under air transport NAICS df_airplanes.loc[:, 'NAICS_lvl'] = \ air_transportation_naics[0:NAICS_level_value] df = pd.concat([df, df_airplanes], ignore_index=True) return df
def test_all_inventory_generation(): error_list = [] for inventory in config()['databases']: if SKIP_BROWSER_DOWNLOAD and inventory in requires_browser_download: continue df = stewi.getInventory(inventory, year) error = df is None or len(df) == 0 if error: error_list.append(inventory) assert len( error_list) == 0, f"Generation of {','.join(error_list)} unsuccessful"
def reassign_airplane_emissions(df, year, NAICS_level_value): import stewi from stewicombo.overlaphandler import remove_default_flow_overlaps from stewicombo.globals import addChemicalMatches ## subtract emissions for air transportation from airports in NEI airport_NAICS = '4881' air_transportation_SCC = '2275020000' air_transportation_naics = '481111' log.info('Reassigning emissions from air transportation from airports') # obtain and prepare SCC dataset df_airplanes = stewi.getInventory('NEI', year, stewiformat='flowbySCC') df_airplanes = df_airplanes[df_airplanes['SCC'] == air_transportation_SCC] df_airplanes['Source'] = 'NEI' df_airplanes = addChemicalMatches(df_airplanes) df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True) df_airplanes.drop(columns=['SCC'], inplace=True) facility_mapping_air = df[['FacilityID', 'NAICS']] facility_mapping_air.drop_duplicates(keep='first', inplace=True) df_airplanes = df_airplanes.merge(facility_mapping_air, how='left', on='FacilityID') df_airplanes['Year'] = year df_airplanes = df_airplanes[df_airplanes['NAICS'].str[0:len(airport_NAICS)] == airport_NAICS] # subtract airplane emissions from airport NAICS at individual facilities df_planeemissions = df_airplanes[['FacilityID', 'FlowName', 'FlowAmount']] df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'}, inplace=True) df = df.merge(df_planeemissions, how='left', on=['FacilityID', 'FlowName']) df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0) df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions'] df.drop(columns=['PlaneEmissions'], inplace=True) # add airplane emissions under air transport NAICS df_airplanes.loc[:, 'NAICS_lvl'] = air_transportation_naics[ 0:NAICS_level_value] df = pd.concat([df, df_airplanes], ignore_index=True) return df
def stewi_to_sector(inventory_dict, NAICS_level, geo_scale, compartments=None): """ Returns emissions from stewi in fbs format, requires stewi >= 0.9.5 :param inventory_dict: a dictionary of inventory types and years (e.g., {'NEI':'2017', 'TRI':'2017'}) :param NAICS_level: desired NAICS aggregation level, using sector_level_key, should match target_sector_level :param geo_scale: desired geographic aggregation level ('national', 'state', 'county'), should match target_geoscale :param compartments: list of compartments to include (e.g., 'water', 'air', 'soil'), use None to include all compartments """ import stewi NAICS_level_value = sector_level_key[NAICS_level] # run stewicombo to combine inventories, filter for LCI, remove overlap df = pd.DataFrame() for database, year in inventory_dict.items(): inv = stewi.getInventory(database, year, filter_for_LCI=True, US_States_Only=True) inv['Year'] = year inv['MetaSources'] = database df = df.append(inv) if compartments != None: df = df[df['Compartment'].isin(compartments)] facility_mapping = extract_facility_data(inventory_dict) # Convert NAICS to string (first to int to avoid decimals) facility_mapping['NAICS'] = facility_mapping['NAICS'].astype(int).astype( str) facility_mapping = naics_expansion(facility_mapping) # merge dataframes to assign facility information based on facility IDs df = pd.merge(df, facility_mapping, how='left', on='FacilityID') # add levelized NAICS code prior to aggregation df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value] fbs = prepare_stewi_fbs(df, inventory_dict, NAICS_level, geo_scale) return fbs
def getInventoriesforFacilityMatches(inventory_dict, facilitymatches, filter_for_LCI, base_inventory=None): if base_inventory is not None: base_inventory_FRS = facilitymatches[facilitymatches['Source'] == base_inventory] base_inventory_FRS_list = list(pd.unique(base_inventory_FRS['FRS_ID'])) inventories = pd.DataFrame() for k in inventory_dict.keys(): inventory = stewi.getInventory(k, inventory_dict[k], 'flowbyfacility', filter_for_LCI) #Get facilities from that matching table to filter this with inventory_facilitymatches = facilitymatches[facilitymatches['Source'] == k] inventory["Source"] = k # Merge inventories based on facility matches inventory = pd.merge(inventory, inventory_facilitymatches, on=['FacilityID', 'Source'], how='left') # If this isn't the base inventory, remove records not for the FRS_IDs of interest if k is not base_inventory: inventory = inventory[inventory['FRS_ID'].isin( base_inventory_FRS_list)] #Add metadata inventory["Year"] = inventory_dict[k] inventory = inventory[columns_to_keep] inventories = pd.concat([inventories, inventory]) #drop duplicates - not sure why there are duplicates - none found in recent attempts inventories = inventories.drop_duplicates() return inventories
import stewi from electricitylci.model_config import model_specs # Get inventory data to get net generation per facility egrid_flowbyfacility = stewi.getInventory("eGRID", model_specs.egrid_year) # Peek at it egrid_flowbyfacility.head(50)
def stewicombo_to_sector(inventory_dict, NAICS_level, geo_scale, compartments): """ Returns emissions from stewicombo in fbs format :param inventory_dict: a dictionary of inventory types and years (e.g., {'NEI':'2017', 'TRI':'2017'}) :param NAICS_level: desired NAICS aggregation level, using sector_level_key, should match target_sector_level :param geo_scale: desired geographic aggregation level ('national', 'state', 'county'), should match target_geoscale :param compartments: list of compartments to include (e.g., 'water', 'air', 'soil'), use None to include all compartments """ from stewi.globals import output_dir as stw_output_dir from stewi.globals import weighted_average import stewi import stewicombo import facilitymatcher from stewicombo.overlaphandler import remove_default_flow_overlaps from stewicombo.globals import addChemicalMatches from facilitymatcher import output_dir as fm_output_dir NAICS_level_value = sector_level_key[NAICS_level] ## run stewicombo to combine inventories, filter for LCI, remove overlap df = stewicombo.combineFullInventories(inventory_dict, filter_for_LCI=True, remove_overlap=True, compartments=compartments) df.drop(columns=['SRS_CAS', 'SRS_ID', 'FacilityIDs_Combined'], inplace=True) facility_mapping = pd.DataFrame() # load facility data from stewi output directory, keeping only the facility IDs, and geographic information inventory_list = list(inventory_dict.keys()) for i in range(len(inventory_dict)): # define inventory name as inventory type + inventory year (e.g., NEI_2017) inventory_name = inventory_list[i] + '_' + list( inventory_dict.values())[i] facilities = pd.read_csv(stw_output_dir + 'facility/' + inventory_name + '.csv', usecols=['FacilityID', 'State', 'County'], dtype={'FacilityID': str}) if len(facilities[facilities.duplicated(subset='FacilityID', keep=False)]) > 0: log.info('Duplicate facilities in ' + inventory_name + ' - keeping first listed') facilities.drop_duplicates(subset='FacilityID', keep='first', inplace=True) facility_mapping = facility_mapping.append(facilities) # Apply FIPS to facility locations facility_mapping = apply_county_FIPS(facility_mapping) ## merge dataframes to assign facility information based on facility IDs df = pd.merge(df, facility_mapping, how='left', on='FacilityID') ## Access NAICS From facility matcher and assign based on FRS_ID all_NAICS = facilitymatcher.get_FRS_NAICSInfo_for_facility_list( frs_id_list=None, inventories_of_interest_list=inventory_list) all_NAICS = all_NAICS.loc[all_NAICS['PRIMARY_INDICATOR'] == 'PRIMARY'] all_NAICS.drop(columns=['PRIMARY_INDICATOR'], inplace=True) all_NAICS = naics_expansion(all_NAICS) if len(all_NAICS[all_NAICS.duplicated(subset=['FRS_ID', 'Source'], keep=False)]) > 0: log.info('Duplicate primary NAICS reported - keeping first') all_NAICS.drop_duplicates(subset=['FRS_ID', 'Source'], keep='first', inplace=True) df = pd.merge(df, all_NAICS, how='left', on=['FRS_ID', 'Source']) # add levelized NAICS code prior to aggregation df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value] ## subtract emissions for air transportation from airports in NEI airport_NAICS = '4881' air_transportation_SCC = '2275020000' air_transportation_naics = '481111' if 'NEI' in inventory_list: log.info('Reassigning emissions from air transportation from airports') # obtain and prepare SCC dataset df_airplanes = stewi.getInventory('NEI', inventory_dict['NEI'], stewiformat='flowbySCC') df_airplanes = df_airplanes[df_airplanes['SCC'] == air_transportation_SCC] df_airplanes['Source'] = 'NEI' df_airplanes = addChemicalMatches(df_airplanes) df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True) df_airplanes.drop(columns=['SCC'], inplace=True) facility_mapping_air = df[['FacilityID', 'NAICS']] facility_mapping_air.drop_duplicates(keep='first', inplace=True) df_airplanes = df_airplanes.merge(facility_mapping_air, how='left', on='FacilityID') df_airplanes['Year'] = inventory_dict['NEI'] df_airplanes = df_airplanes[ df_airplanes['NAICS'].str[0:len(airport_NAICS)] == airport_NAICS] # subtract airplane emissions from airport NAICS at individual facilities df_planeemissions = df_airplanes[[ 'FacilityID', 'FlowName', 'FlowAmount' ]] df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'}, inplace=True) df = df.merge(df_planeemissions, how='left', on=['FacilityID', 'FlowName']) df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0) df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions'] df.drop(columns=['PlaneEmissions'], inplace=True) # add airplane emissions under air transport NAICS df_airplanes.loc[:, 'NAICS_lvl'] = air_transportation_naics[ 0:NAICS_level_value] df = pd.concat([df, df_airplanes], ignore_index=True) # update location to appropriate geoscale prior to aggregating df.dropna(subset=['Location'], inplace=True) df['Location'] = df['Location'].astype(str) df = update_geoscale(df, geo_scale) # assign grouping variables based on desired geographic aggregation level grouping_vars = ['NAICS_lvl', 'FlowName', 'Compartment', 'Location'] # aggregate by NAICS code, FlowName, compartment, and geographic level fbs = df.groupby(grouping_vars).agg({ 'FlowAmount': 'sum', 'Year': 'first', 'Unit': 'first' }) # add reliability score fbs['DataReliability'] = weighted_average(df, 'ReliabilityScore', 'FlowAmount', grouping_vars) fbs.reset_index(inplace=True) # apply flow mapping fbs = map_elementary_flows(fbs, inventory_list) # rename columns to match flowbysector format fbs = fbs.rename(columns={"NAICS_lvl": "SectorProducedBy"}) # add hardcoded data, depending on the source data, some of these fields may need to change fbs['Class'] = 'Chemicals' fbs['SectorConsumedBy'] = 'None' fbs['SectorSourceName'] = 'NAICS_2012_Code' fbs['FlowType'] = 'ELEMENTARY_FLOW' fbs = assign_fips_location_system(fbs, list(inventory_dict.values())[0]) # add missing flow by sector fields fbs = add_missing_flow_by_fields(fbs, flow_by_sector_fields) # sort dataframe and reset index fbs = fbs.sort_values(list( flow_by_sector_fields.keys())).reset_index(drop=True) return fbs
def test_RCRAInfo_generation(): assert stewi.getInventory('RCRAInfo', year) is not None
def test_DMR_generation(): assert stewi.getInventory('DMR', year) is not None
def test_GHGRP_generation(): assert stewi.getInventory('GHGRP', year) is not None
def test_eGRID_generation(): assert stewi.getInventory('eGRID', year) is not None
def test_TRI_generation(): assert stewi.getInventory('TRI', year) is not None
import stewi from electricitylci.model_config import egrid_year #Get inventory data to get net generation per facility egrid_flowbyfacility = stewi.getInventory("eGRID", egrid_year) #Peek at it egrid_flowbyfacility.head(50)
import stewi import stewicombo stewi.seeAvailableInventoriesandYears() inventory='TRI' year = '2016' #Get one of these inventory tri2016 = stewi.getInventory(inventory,year) #See first 50 tri2016.head(50) #Look at all the unique flows in this inventory tri2016flows = stewi.getInventoryFlows(inventory,year) #See first 50 tri2016flows.head(50) #Look at all the unique facilities in this inventory tri2016facilities = stewi.getInventoryFacilities(inventory,year) #See first 50 tri2016facilities.head(50) #Now combine with some inventories in another inventory based on facilities #Enter inventories that you would like to combine in the "Inventory_acryonym":"year" format enclosed in "{}" inventories_to_get = {"TRI":"2016","NEI":"2016","RCRAInfo":"2015","eGRID":"2016"} base_inventory = inventory combinedinventories = stewicombo.combineInventoriesforFacilitiesinOneInventory(base_inventory, inventories_to_get) #See first 50 combinedinventories.head(50)