Пример #1
0
def get_reported_releases(CASlist):
    """
    Retrieves release info from stewi for a list of CAS
    :param CASlist: list, a list of CAS in standard CAS format
    :return: a pandas DataFrame with records for each release with context and facility information
    """
    chem_releases = pd.DataFrame()
    for k, v in inventories_of_interest.items():
        inv = stewi.getInventory(k, v)
        #filter by chems of interest
        inv['FlowName'] = inv['FlowName'].apply(lambda x: x.lower())
        inv_fl_of_interest = list(chems_stewi_matches[k].values)
        inv_fl_of_interest = list(filter(None, inv_fl_of_interest))
        inv_fl_of_interest = [x.lower() for x in inv_fl_of_interest]
        inv = inv[inv["FlowName"].isin(inv_fl_of_interest)]
        inv["Source"] = k
        inv["Year"] = v

        #Join with facility data to get location
        fac = stewi.getInventoryFacilities(k, v)
        #Filter by fac in chem_releases
        uniq_facs = pd.unique(inv['FacilityID'])
        fac = fac[fac["FacilityID"].isin(uniq_facs)]
        inv = pd.merge(inv, fac, on=['FacilityID'])
        chem_releases = pd.concat([chem_releases, inv], sort=False)

    return chem_releases
Пример #2
0
def stewi_to_sector(yaml_load):
    """
    Returns emissions from stewi in fbs format, requires stewi >= 0.9.5
    :param yaml_load: which may contain the following elements:
        inventory_dict: a dictionary of inventory types and years (e.g.,
                {'NEI':'2017', 'TRI':'2017'})
        NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
        geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
        compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
        functions: list of functions (str) to call for additional processing
    :return: df, FBS format
    """
    import stewi

    # determine if fxns specified in FBS method yaml
    if 'functions' not in yaml_load:
        functions = []
    else:
        functions = yaml_load['functions']

    NAICS_level_value = sector_level_key[yaml_load['NAICS_level']]
    # run stewi to generate inventory and filter for LCI
    df = pd.DataFrame()
    for database, year in yaml_load['inventory_dict'].items():
        inv = stewi.getInventory(database,
                                 year,
                                 filter_for_LCI=True,
                                 US_States_Only=True)
        inv['Year'] = year
        inv['MetaSources'] = database
        df = df.append(inv)
    if yaml_load['compartments'] is not None:
        df = df[df['Compartment'].isin(yaml_load['compartments'])]
    facility_mapping = extract_facility_data(yaml_load['inventory_dict'])
    # Convert NAICS to string (first to int to avoid decimals)
    facility_mapping['NAICS'] = \
        facility_mapping['NAICS'].astype(int).astype(str)
    facility_mapping = naics_expansion(facility_mapping)

    # merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    fbs = prepare_stewi_fbs(df, yaml_load['inventory_dict'],
                            yaml_load['NAICS_level'], yaml_load['geo_scale'])

    for function in functions:
        fbs = getattr(sys.modules[__name__], function)(fbs)

    return fbs
Пример #3
0
def getInventoriesforFacilityMatches(inventory_dict, facilitymatches,
                                     filter_for_LCI, base_inventory=None):
    """Retrieve stored flowbyfacility datasets based on passed dictionary.

    Filters them if necessary. Returns only those facilities with an FRS_ID
    except for those in the base_inventory where all are returned.
    :param inventory_dict:
    :param facilitymatches: dataframe matching FacilityMatches format
    :param filter_for_LCI:
    :param base_inventory:
    """
    if base_inventory is not None:
        # Identify the FRS in the base inventory and keep only those
        # base_inventory_FRS = facilitymatches[
        #     facilitymatches['Source'] == base_inventory]
        base_FRS_list = list(pd.unique(facilitymatches[
            facilitymatches['Source'] == base_inventory]['FRS_ID']))

    columns_to_keep = StewiFormat.FLOWBYFACILITY.fields() + ['Source',
                                                             'Year', 'FRS_ID']
    inventories = pd.DataFrame()
    filters = None
    if filter_for_LCI:
        filters = ['filter_for_LCI']
    for k in inventory_dict.keys():
        inventory = stewi.getInventory(k, inventory_dict[k],
                                       'flowbyfacility',
                                       filters)
        if inventory is None:
            continue
        inventory["Source"] = k
        # Merge in FRS_ID, ensure only single FRS added per facility ID, keeping
        # first listed
        facmatches = facilitymatches[facilitymatches['Source'] == k]
        facmatches = facmatches.drop_duplicates(subset=['FacilityID', 'Source'],
                                                keep='first')
        inventory = pd.merge(inventory,
                             facmatches,
                             on=['FacilityID', 'Source'], how='left')
        if inventory['FRS_ID'].isna().sum() > 0:
            log.debug('Some facilities missing FRS_ID')

        # If this isn't the base inventory, filter records for facilities not
        # found in the base inventory
        if k is not base_inventory and base_inventory is not None:
            inventory = inventory[inventory['FRS_ID'].isin(
                base_FRS_list)]

        # Add metadata
        inventory["Year"] = inventory_dict[k]
        cols_to_keep = [c for c in columns_to_keep if c in inventory]
        inventory = inventory[cols_to_keep]
        inventories = pd.concat([inventories, inventory], ignore_index=True)

    return inventories
Пример #4
0
def reassign_airplane_emissions(df, year, NAICS_level_value):
    """
    Reassigns emissions from airplanes to NAICS associated with air
    transportation instead of the NAICS assigned to airports
    :param df: a dataframe of emissions and mapped faciliites from stewicombo
    :param year: year as str
    :param NAICS_level_value: desired NAICS aggregation level,
        using sector_level_key, should match target_sector_level
    :return: df
    """
    import stewi
    from stewicombo.overlaphandler import remove_default_flow_overlaps
    from stewicombo.globals import addChemicalMatches

    # subtract emissions for air transportation from airports in NEI
    airport_NAICS = '4881'
    air_transportation_SCC = '2275020000'
    air_transportation_naics = '481111'
    log.info('Reassigning emissions from air transportation from airports')

    # obtain and prepare SCC dataset
    df_airplanes = stewi.getInventory('NEI', year, stewiformat='flowbyprocess')
    df_airplanes = df_airplanes[df_airplanes['Process'] ==
                                air_transportation_SCC]
    df_airplanes['Source'] = 'NEI'
    df_airplanes = addChemicalMatches(df_airplanes)
    df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True)
    df_airplanes.drop(columns=['Process'], inplace=True)

    facility_mapping_air = df[['FacilityID', 'NAICS']]
    facility_mapping_air.drop_duplicates(keep='first', inplace=True)
    df_airplanes = df_airplanes.merge(facility_mapping_air,
                                      how='left',
                                      on='FacilityID')

    df_airplanes['Year'] = year
    df_airplanes = df_airplanes[df_airplanes['NAICS'].str[0:len(airport_NAICS)]
                                == airport_NAICS]

    # subtract airplane emissions from airport NAICS at individual facilities
    df_planeemissions = df_airplanes[['FacilityID', 'FlowName', 'FlowAmount']]
    df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'},
                             inplace=True)
    df = df.merge(df_planeemissions, how='left', on=['FacilityID', 'FlowName'])
    df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0)
    df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions']
    df.drop(columns=['PlaneEmissions'], inplace=True)

    # add airplane emissions under air transport NAICS
    df_airplanes.loc[:, 'NAICS_lvl'] = \
        air_transportation_naics[0:NAICS_level_value]
    df = pd.concat([df, df_airplanes], ignore_index=True)

    return df
def test_all_inventory_generation():
    error_list = []
    for inventory in config()['databases']:
        if SKIP_BROWSER_DOWNLOAD and inventory in requires_browser_download:
            continue
        df = stewi.getInventory(inventory, year)
        error = df is None or len(df) == 0
        if error:
            error_list.append(inventory)
    assert len(
        error_list) == 0, f"Generation of {','.join(error_list)} unsuccessful"
Пример #6
0
def reassign_airplane_emissions(df, year, NAICS_level_value):
    import stewi
    from stewicombo.overlaphandler import remove_default_flow_overlaps
    from stewicombo.globals import addChemicalMatches

    ## subtract emissions for air transportation from airports in NEI
    airport_NAICS = '4881'
    air_transportation_SCC = '2275020000'
    air_transportation_naics = '481111'
    log.info('Reassigning emissions from air transportation from airports')

    # obtain and prepare SCC dataset
    df_airplanes = stewi.getInventory('NEI', year, stewiformat='flowbySCC')
    df_airplanes = df_airplanes[df_airplanes['SCC'] == air_transportation_SCC]
    df_airplanes['Source'] = 'NEI'
    df_airplanes = addChemicalMatches(df_airplanes)
    df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True)
    df_airplanes.drop(columns=['SCC'], inplace=True)

    facility_mapping_air = df[['FacilityID', 'NAICS']]
    facility_mapping_air.drop_duplicates(keep='first', inplace=True)
    df_airplanes = df_airplanes.merge(facility_mapping_air,
                                      how='left',
                                      on='FacilityID')

    df_airplanes['Year'] = year
    df_airplanes = df_airplanes[df_airplanes['NAICS'].str[0:len(airport_NAICS)]
                                == airport_NAICS]

    # subtract airplane emissions from airport NAICS at individual facilities
    df_planeemissions = df_airplanes[['FacilityID', 'FlowName', 'FlowAmount']]
    df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'},
                             inplace=True)
    df = df.merge(df_planeemissions, how='left', on=['FacilityID', 'FlowName'])
    df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0)
    df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions']
    df.drop(columns=['PlaneEmissions'], inplace=True)

    # add airplane emissions under air transport NAICS
    df_airplanes.loc[:, 'NAICS_lvl'] = air_transportation_naics[
        0:NAICS_level_value]
    df = pd.concat([df, df_airplanes], ignore_index=True)

    return df
Пример #7
0
def stewi_to_sector(inventory_dict, NAICS_level, geo_scale, compartments=None):
    """
    Returns emissions from stewi in fbs format, requires stewi >= 0.9.5
    :param inventory_dict: a dictionary of inventory types and years (e.g., 
                {'NEI':'2017', 'TRI':'2017'})
    :param NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
    :param geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
    :param compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
    """
    import stewi

    NAICS_level_value = sector_level_key[NAICS_level]
    # run stewicombo to combine inventories, filter for LCI, remove overlap
    df = pd.DataFrame()
    for database, year in inventory_dict.items():
        inv = stewi.getInventory(database,
                                 year,
                                 filter_for_LCI=True,
                                 US_States_Only=True)
        inv['Year'] = year
        inv['MetaSources'] = database
        df = df.append(inv)
    if compartments != None:
        df = df[df['Compartment'].isin(compartments)]
    facility_mapping = extract_facility_data(inventory_dict)
    # Convert NAICS to string (first to int to avoid decimals)
    facility_mapping['NAICS'] = facility_mapping['NAICS'].astype(int).astype(
        str)
    facility_mapping = naics_expansion(facility_mapping)

    # merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    fbs = prepare_stewi_fbs(df, inventory_dict, NAICS_level, geo_scale)

    return fbs
Пример #8
0
def getInventoriesforFacilityMatches(inventory_dict,
                                     facilitymatches,
                                     filter_for_LCI,
                                     base_inventory=None):

    if base_inventory is not None:
        base_inventory_FRS = facilitymatches[facilitymatches['Source'] ==
                                             base_inventory]
        base_inventory_FRS_list = list(pd.unique(base_inventory_FRS['FRS_ID']))

    inventories = pd.DataFrame()
    for k in inventory_dict.keys():
        inventory = stewi.getInventory(k, inventory_dict[k], 'flowbyfacility',
                                       filter_for_LCI)
        #Get facilities from that matching table to filter this with
        inventory_facilitymatches = facilitymatches[facilitymatches['Source']
                                                    == k]
        inventory["Source"] = k

        # Merge inventories based on facility matches
        inventory = pd.merge(inventory,
                             inventory_facilitymatches,
                             on=['FacilityID', 'Source'],
                             how='left')

        # If this isn't the base inventory, remove records not for the FRS_IDs of interest
        if k is not base_inventory:
            inventory = inventory[inventory['FRS_ID'].isin(
                base_inventory_FRS_list)]

        #Add metadata
        inventory["Year"] = inventory_dict[k]
        inventory = inventory[columns_to_keep]
        inventories = pd.concat([inventories, inventory])

    #drop duplicates - not sure why there are duplicates - none found in recent attempts
    inventories = inventories.drop_duplicates()
    return inventories
import stewi
from electricitylci.model_config import model_specs

# Get inventory data to get net generation per facility
egrid_flowbyfacility = stewi.getInventory("eGRID", model_specs.egrid_year)

# Peek at it
egrid_flowbyfacility.head(50)
Пример #10
0
def stewicombo_to_sector(inventory_dict, NAICS_level, geo_scale, compartments):
    """
    Returns emissions from stewicombo in fbs format
    :param inventory_dict: a dictionary of inventory types and years (e.g., 
                {'NEI':'2017', 'TRI':'2017'})
    :param NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
    :param geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
    :param compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
    """

    from stewi.globals import output_dir as stw_output_dir
    from stewi.globals import weighted_average
    import stewi
    import stewicombo
    import facilitymatcher
    from stewicombo.overlaphandler import remove_default_flow_overlaps
    from stewicombo.globals import addChemicalMatches
    from facilitymatcher import output_dir as fm_output_dir

    NAICS_level_value = sector_level_key[NAICS_level]
    ## run stewicombo to combine inventories, filter for LCI, remove overlap
    df = stewicombo.combineFullInventories(inventory_dict,
                                           filter_for_LCI=True,
                                           remove_overlap=True,
                                           compartments=compartments)
    df.drop(columns=['SRS_CAS', 'SRS_ID', 'FacilityIDs_Combined'],
            inplace=True)

    facility_mapping = pd.DataFrame()
    # load facility data from stewi output directory, keeping only the facility IDs, and geographic information
    inventory_list = list(inventory_dict.keys())
    for i in range(len(inventory_dict)):
        # define inventory name as inventory type + inventory year (e.g., NEI_2017)
        inventory_name = inventory_list[i] + '_' + list(
            inventory_dict.values())[i]
        facilities = pd.read_csv(stw_output_dir + 'facility/' +
                                 inventory_name + '.csv',
                                 usecols=['FacilityID', 'State', 'County'],
                                 dtype={'FacilityID': str})
        if len(facilities[facilities.duplicated(subset='FacilityID',
                                                keep=False)]) > 0:
            log.info('Duplicate facilities in ' + inventory_name +
                     ' - keeping first listed')
            facilities.drop_duplicates(subset='FacilityID',
                                       keep='first',
                                       inplace=True)
        facility_mapping = facility_mapping.append(facilities)

    # Apply FIPS to facility locations
    facility_mapping = apply_county_FIPS(facility_mapping)

    ## merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    ## Access NAICS From facility matcher and assign based on FRS_ID
    all_NAICS = facilitymatcher.get_FRS_NAICSInfo_for_facility_list(
        frs_id_list=None, inventories_of_interest_list=inventory_list)
    all_NAICS = all_NAICS.loc[all_NAICS['PRIMARY_INDICATOR'] == 'PRIMARY']
    all_NAICS.drop(columns=['PRIMARY_INDICATOR'], inplace=True)
    all_NAICS = naics_expansion(all_NAICS)
    if len(all_NAICS[all_NAICS.duplicated(subset=['FRS_ID', 'Source'],
                                          keep=False)]) > 0:
        log.info('Duplicate primary NAICS reported - keeping first')
        all_NAICS.drop_duplicates(subset=['FRS_ID', 'Source'],
                                  keep='first',
                                  inplace=True)
    df = pd.merge(df, all_NAICS, how='left', on=['FRS_ID', 'Source'])

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    ## subtract emissions for air transportation from airports in NEI
    airport_NAICS = '4881'
    air_transportation_SCC = '2275020000'
    air_transportation_naics = '481111'
    if 'NEI' in inventory_list:
        log.info('Reassigning emissions from air transportation from airports')

        # obtain and prepare SCC dataset
        df_airplanes = stewi.getInventory('NEI',
                                          inventory_dict['NEI'],
                                          stewiformat='flowbySCC')
        df_airplanes = df_airplanes[df_airplanes['SCC'] ==
                                    air_transportation_SCC]
        df_airplanes['Source'] = 'NEI'
        df_airplanes = addChemicalMatches(df_airplanes)
        df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True)
        df_airplanes.drop(columns=['SCC'], inplace=True)

        facility_mapping_air = df[['FacilityID', 'NAICS']]
        facility_mapping_air.drop_duplicates(keep='first', inplace=True)
        df_airplanes = df_airplanes.merge(facility_mapping_air,
                                          how='left',
                                          on='FacilityID')

        df_airplanes['Year'] = inventory_dict['NEI']
        df_airplanes = df_airplanes[
            df_airplanes['NAICS'].str[0:len(airport_NAICS)] == airport_NAICS]

        # subtract airplane emissions from airport NAICS at individual facilities
        df_planeemissions = df_airplanes[[
            'FacilityID', 'FlowName', 'FlowAmount'
        ]]
        df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'},
                                 inplace=True)
        df = df.merge(df_planeemissions,
                      how='left',
                      on=['FacilityID', 'FlowName'])
        df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0)
        df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions']
        df.drop(columns=['PlaneEmissions'], inplace=True)

        # add airplane emissions under air transport NAICS
        df_airplanes.loc[:, 'NAICS_lvl'] = air_transportation_naics[
            0:NAICS_level_value]
        df = pd.concat([df, df_airplanes], ignore_index=True)

    # update location to appropriate geoscale prior to aggregating
    df.dropna(subset=['Location'], inplace=True)
    df['Location'] = df['Location'].astype(str)
    df = update_geoscale(df, geo_scale)

    # assign grouping variables based on desired geographic aggregation level
    grouping_vars = ['NAICS_lvl', 'FlowName', 'Compartment', 'Location']

    # aggregate by NAICS code, FlowName, compartment, and geographic level
    fbs = df.groupby(grouping_vars).agg({
        'FlowAmount': 'sum',
        'Year': 'first',
        'Unit': 'first'
    })

    # add reliability score
    fbs['DataReliability'] = weighted_average(df, 'ReliabilityScore',
                                              'FlowAmount', grouping_vars)
    fbs.reset_index(inplace=True)

    # apply flow mapping
    fbs = map_elementary_flows(fbs, inventory_list)

    # rename columns to match flowbysector format
    fbs = fbs.rename(columns={"NAICS_lvl": "SectorProducedBy"})

    # add hardcoded data, depending on the source data, some of these fields may need to change
    fbs['Class'] = 'Chemicals'
    fbs['SectorConsumedBy'] = 'None'
    fbs['SectorSourceName'] = 'NAICS_2012_Code'
    fbs['FlowType'] = 'ELEMENTARY_FLOW'

    fbs = assign_fips_location_system(fbs, list(inventory_dict.values())[0])

    # add missing flow by sector fields
    fbs = add_missing_flow_by_fields(fbs, flow_by_sector_fields)

    # sort dataframe and reset index
    fbs = fbs.sort_values(list(
        flow_by_sector_fields.keys())).reset_index(drop=True)

    return fbs
def test_RCRAInfo_generation():
    assert stewi.getInventory('RCRAInfo', year) is not None
def test_DMR_generation():
    assert stewi.getInventory('DMR', year) is not None
def test_GHGRP_generation():
    assert stewi.getInventory('GHGRP', year) is not None
def test_eGRID_generation():
    assert stewi.getInventory('eGRID', year) is not None
def test_TRI_generation():
    assert stewi.getInventory('TRI', year) is not None
Пример #16
0
import stewi
from electricitylci.model_config import egrid_year

#Get inventory data to get net generation per facility
egrid_flowbyfacility = stewi.getInventory("eGRID", egrid_year)

#Peek at it
egrid_flowbyfacility.head(50)
Пример #17
0
import stewi
import stewicombo

stewi.seeAvailableInventoriesandYears()

inventory='TRI'
year = '2016'

#Get one of these inventory
tri2016 = stewi.getInventory(inventory,year)
#See first 50
tri2016.head(50)

#Look at all the unique flows in this inventory
tri2016flows = stewi.getInventoryFlows(inventory,year)
#See first 50
tri2016flows.head(50)

#Look at all the unique facilities in this inventory
tri2016facilities = stewi.getInventoryFacilities(inventory,year)
#See first 50
tri2016facilities.head(50)

#Now combine with some inventories in another inventory based on facilities
#Enter inventories that you would like to combine in the "Inventory_acryonym":"year" format enclosed in "{}"
inventories_to_get = {"TRI":"2016","NEI":"2016","RCRAInfo":"2015","eGRID":"2016"}

base_inventory = inventory
combinedinventories = stewicombo.combineInventoriesforFacilitiesinOneInventory(base_inventory, inventories_to_get)
#See first 50
combinedinventories.head(50)