示例#1
0
def stewicombo_to_sector(inventory_dict, NAICS_level, geo_scale, compartments):
    """
    Returns emissions from stewicombo in fbs format, requires stewi >= 0.9.5
    :param inventory_dict: a dictionary of inventory types and years (e.g., 
                {'NEI':'2017', 'TRI':'2017'})
    :param NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
    :param geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
    :param compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
    """

    import stewicombo
    from flowsa.EPA_NEI import drop_GHGs

    NAICS_level_value = sector_level_key[NAICS_level]
    ## run stewicombo to combine inventories, filter for LCI, remove overlap
    df = stewicombo.combineFullInventories(inventory_dict,
                                           filter_for_LCI=True,
                                           remove_overlap=True,
                                           compartments=compartments)
    df.drop(columns=['SRS_CAS', 'SRS_ID', 'FacilityIDs_Combined'],
            inplace=True)

    inventory_list = list(inventory_dict.keys())

    if 'NEI' in inventory_list and not 'GHGRP' in inventory_list:
        df = drop_GHGs(df)
    facility_mapping = extract_facility_data(inventory_dict)
    # use NAICS from facility matcher so drop them here
    facility_mapping.drop(columns=['NAICS'], inplace=True)
    # merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    all_NAICS = obtain_NAICS_from_facility_matcher(inventory_list)
    df = pd.merge(df, all_NAICS, how='left', on=['FRS_ID', 'Source'])

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    if 'NEI' in inventory_list:
        df = reassign_airplane_emissions(df, inventory_dict['NEI'],
                                         NAICS_level_value)

    df['MetaSources'] = df['Source']

    fbs = prepare_stewi_fbs(df, inventory_dict, NAICS_level, geo_scale)

    return fbs
示例#2
0
def stewicombo_to_sector(yaml_load):
    """
    Returns emissions from stewicombo in fbs format, requires stewi >= 0.9.5
    :param yaml_load: which may contain the following elements:
        local_inventory_name: (optional) a string naming the file from which to
                source a pregenerated stewicombo file stored locally (e.g.,
                'CAP_HAP_national_2017_v0.9.7_5cf36c0.parquet' or
                'CAP_HAP_national_2017')
        inventory_dict: a dictionary of inventory types and years (e.g.,
                {'NEI':'2017', 'TRI':'2017'})
        NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
        geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
        compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
        functions: list of functions (str) to call for additional processing
    :return: df, FBS format
    """

    import stewicombo
    from flowsa.data_source_scripts.EPA_NEI import drop_GHGs

    # determine if fxns specified in FBS method yaml
    if 'functions' not in yaml_load:
        functions = []
    else:
        functions = yaml_load['functions']

    if 'local_inventory_name' in yaml_load:
        inventory_name = yaml_load['local_inventory_name']
    else:
        inventory_name = None

    NAICS_level_value = sector_level_key[yaml_load['NAICS_level']]

    df = None
    if inventory_name is not None:
        df = stewicombo.getInventory(inventory_name, True)
    if df is None:
        # run stewicombo to combine inventories, filter for LCI, remove overlap
        log.info('generating inventory in stewicombo')
        df = stewicombo.combineFullInventories(
            yaml_load['inventory_dict'],
            filter_for_LCI=True,
            remove_overlap=True,
            compartments=yaml_load['compartments'])

    if df is None:
        # Inventories not found for stewicombo, return empty FBS
        return None

    df.drop(columns=['SRS_CAS', 'SRS_ID', 'FacilityIDs_Combined'],
            inplace=True)

    inventory_list = list(yaml_load['inventory_dict'].keys())

    if 'drop_GHGs' in functions:
        df = drop_GHGs(df)
        functions.remove('drop_GHGs')
    facility_mapping = extract_facility_data(yaml_load['inventory_dict'])
    # use NAICS from facility matcher so drop them here
    facility_mapping.drop(columns=['NAICS'], inplace=True)
    # merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    all_NAICS = obtain_NAICS_from_facility_matcher(inventory_list)
    df = pd.merge(df, all_NAICS, how='left', on=['FRS_ID', 'Source'])

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    if 'reassign_airplane_emissions' in functions:
        df = reassign_airplane_emissions(df,
                                         yaml_load['inventory_dict']['NEI'],
                                         NAICS_level_value)
        functions.remove('reassign_airplane_emissions')

    df['MetaSources'] = df['Source']

    fbs = prepare_stewi_fbs(df, yaml_load['inventory_dict'],
                            yaml_load['NAICS_level'], yaml_load['geo_scale'])

    for function in functions:
        fbs = getattr(sys.modules[__name__], function)(fbs)

    return fbs
示例#3
0
def stewicombo_to_sector(inventory_dict, NAICS_level, geo_level, compartments):
    """
    This function takes the following inputs:
        - inventory_dict: a dictionary of inventory types and years (e.g., {'NEI':'2017', 'TRI':'2017'})
        - NAICS_level: desired NAICS aggregation level (2-6)
        - geo_level: desired geographic aggregation level ('National', 'State', 'County')
        - compartments: list of compartments to include (e.g., 'water', 'air', 'land')
    """

    ## run stewicombo to combine inventories, filter for LCI, remove overlap
    df = stewicombo.combineFullInventories(inventory_dict,
                                           filter_for_LCI=True,
                                           remove_overlap=True,
                                           compartments=compartments)

    ## create mapping to convert facility IDs --> NAICS codes
    facility_mapping = pd.DataFrame()
    # for all inventories in list:
    # - load facility data from stewi output directory, keeping only the facility IDs, NAICS codes, and geographic information
    # - create new column indicating inventory source (database and year)
    # - append data to master data frame
    for i in range(len(inventory_dict)):
        # define inventory name as inventory type + inventory year (e.g., NEI_2017)
        inventory_name = list(inventory_dict.keys())[i] + '_' + list(
            inventory_dict.values())[i]
        facilities = pd.read_csv(
            stw_output_dir + 'facility/' + inventory_name + '.csv',
            usecols=['FacilityID', 'NAICS', 'State', 'County'],
            dtype={
                'FacilityID': str,
                'NAICS': int
            })
        # rename counties as County + State (e.g., Bristol_MA), since some states share county names
        facilities['County'] = facilities['County'] + '_' + facilities['State']
        facilities['SourceYear'] = inventory_name
        facility_mapping = facility_mapping.append(facilities)

    ## merge dataframes to assign NAICS codes based on facility IDs
    df['SourceYear'] = df['Source'] + '_' + df['Year']
    df = pd.merge(df,
                  facility_mapping,
                  how='left',
                  left_on=['FacilityID', 'SourceYear'],
                  right_on=['FacilityID', 'SourceYear'])

    ## subtract emissions for air transportation from airports
    # PLACEHOLDER TO SUBTRACT EMISSIONS FOR AIR TRANSPORT

    ## aggregate data based on NAICS code and chemical ID
    # add levelized NAICS code
    df['NAICS_lvl'] = df['NAICS'].astype(str).str[0:NAICS_level]
    # assign grouping variables based on desired geographic aggregation level
    if geo_level == 'National':
        grouping_vars = ['NAICS_lvl', 'SRS_ID', 'Compartment']
    elif geo_level == 'State':
        grouping_vars = ['NAICS_lvl', 'SRS_ID', 'Compartment', 'State']
    elif geo_level == 'County':
        grouping_vars = ['NAICS_lvl', 'SRS_ID', 'Compartment', 'County']
    # aggregate by NAICS code, chemical ID, compartment, and geographic level
    fbs = df.groupby(grouping_vars).agg({
        'FlowAmount': 'sum',
        'NAICS_lvl': 'first',
        'Compartment': 'first',
        'FlowName': 'first',
        'Year': 'first',
        'Unit': 'first',
        'State': 'first',
        'County': 'first'
    })
    # add reliability score
    fbs['DataReliability'] = weighted_average(df, 'ReliabilityScore',
                                              'FlowAmount', grouping_vars)

    ## perform operations to match flowbysector format
    # rename columns to match flowbysector format
    fbs = fbs.rename(
        columns={
            "NAICS_lvl": "SectorProducedBy",
            "FlowName": "Flowable",
            "Compartment": "Context"
        })
    # add hardcoded data
    fbs['National'] = 'United States'
    fbs['Class'] = 'Chemicals'
    fbs['SectorConsumedBy'] = 'None'
    fbs['Location'] = fbs[geo_level]
    fbs = assign_fips_location_system(fbs, list(inventory_dict.values())[0])
    # add missing flow by sector fields
    fbs = add_missing_flow_by_fields(fbs, flow_by_sector_fields)
    # sort dataframe and reset index
    fbs = fbs.sort_values(list(
        flow_by_sector_fields.keys())).reset_index(drop=True)

    ## save result to output directory
    fbs.to_csv(output_dir + 'Chemicals_' + geo_level + '.csv')
示例#4
0
def stewicombo_to_sector(inventory_dict, NAICS_level, geo_scale, compartments):
    """
    Returns emissions from stewicombo in fbs format
    :param inventory_dict: a dictionary of inventory types and years (e.g., 
                {'NEI':'2017', 'TRI':'2017'})
    :param NAICS_level: desired NAICS aggregation level, using sector_level_key,
                should match target_sector_level
    :param geo_scale: desired geographic aggregation level ('national', 'state',
                'county'), should match target_geoscale
    :param compartments: list of compartments to include (e.g., 'water', 'air',
                'soil'), use None to include all compartments
    """

    from stewi.globals import output_dir as stw_output_dir
    from stewi.globals import weighted_average
    import stewi
    import stewicombo
    import facilitymatcher
    from stewicombo.overlaphandler import remove_default_flow_overlaps
    from stewicombo.globals import addChemicalMatches
    from facilitymatcher import output_dir as fm_output_dir

    NAICS_level_value = sector_level_key[NAICS_level]
    ## run stewicombo to combine inventories, filter for LCI, remove overlap
    df = stewicombo.combineFullInventories(inventory_dict,
                                           filter_for_LCI=True,
                                           remove_overlap=True,
                                           compartments=compartments)
    df.drop(columns=['SRS_CAS', 'SRS_ID', 'FacilityIDs_Combined'],
            inplace=True)

    facility_mapping = pd.DataFrame()
    # load facility data from stewi output directory, keeping only the facility IDs, and geographic information
    inventory_list = list(inventory_dict.keys())
    for i in range(len(inventory_dict)):
        # define inventory name as inventory type + inventory year (e.g., NEI_2017)
        inventory_name = inventory_list[i] + '_' + list(
            inventory_dict.values())[i]
        facilities = pd.read_csv(stw_output_dir + 'facility/' +
                                 inventory_name + '.csv',
                                 usecols=['FacilityID', 'State', 'County'],
                                 dtype={'FacilityID': str})
        if len(facilities[facilities.duplicated(subset='FacilityID',
                                                keep=False)]) > 0:
            log.info('Duplicate facilities in ' + inventory_name +
                     ' - keeping first listed')
            facilities.drop_duplicates(subset='FacilityID',
                                       keep='first',
                                       inplace=True)
        facility_mapping = facility_mapping.append(facilities)

    # Apply FIPS to facility locations
    facility_mapping = apply_county_FIPS(facility_mapping)

    ## merge dataframes to assign facility information based on facility IDs
    df = pd.merge(df, facility_mapping, how='left', on='FacilityID')

    ## Access NAICS From facility matcher and assign based on FRS_ID
    all_NAICS = facilitymatcher.get_FRS_NAICSInfo_for_facility_list(
        frs_id_list=None, inventories_of_interest_list=inventory_list)
    all_NAICS = all_NAICS.loc[all_NAICS['PRIMARY_INDICATOR'] == 'PRIMARY']
    all_NAICS.drop(columns=['PRIMARY_INDICATOR'], inplace=True)
    all_NAICS = naics_expansion(all_NAICS)
    if len(all_NAICS[all_NAICS.duplicated(subset=['FRS_ID', 'Source'],
                                          keep=False)]) > 0:
        log.info('Duplicate primary NAICS reported - keeping first')
        all_NAICS.drop_duplicates(subset=['FRS_ID', 'Source'],
                                  keep='first',
                                  inplace=True)
    df = pd.merge(df, all_NAICS, how='left', on=['FRS_ID', 'Source'])

    # add levelized NAICS code prior to aggregation
    df['NAICS_lvl'] = df['NAICS'].str[0:NAICS_level_value]

    ## subtract emissions for air transportation from airports in NEI
    airport_NAICS = '4881'
    air_transportation_SCC = '2275020000'
    air_transportation_naics = '481111'
    if 'NEI' in inventory_list:
        log.info('Reassigning emissions from air transportation from airports')

        # obtain and prepare SCC dataset
        df_airplanes = stewi.getInventory('NEI',
                                          inventory_dict['NEI'],
                                          stewiformat='flowbySCC')
        df_airplanes = df_airplanes[df_airplanes['SCC'] ==
                                    air_transportation_SCC]
        df_airplanes['Source'] = 'NEI'
        df_airplanes = addChemicalMatches(df_airplanes)
        df_airplanes = remove_default_flow_overlaps(df_airplanes, SCC=True)
        df_airplanes.drop(columns=['SCC'], inplace=True)

        facility_mapping_air = df[['FacilityID', 'NAICS']]
        facility_mapping_air.drop_duplicates(keep='first', inplace=True)
        df_airplanes = df_airplanes.merge(facility_mapping_air,
                                          how='left',
                                          on='FacilityID')

        df_airplanes['Year'] = inventory_dict['NEI']
        df_airplanes = df_airplanes[
            df_airplanes['NAICS'].str[0:len(airport_NAICS)] == airport_NAICS]

        # subtract airplane emissions from airport NAICS at individual facilities
        df_planeemissions = df_airplanes[[
            'FacilityID', 'FlowName', 'FlowAmount'
        ]]
        df_planeemissions.rename(columns={'FlowAmount': 'PlaneEmissions'},
                                 inplace=True)
        df = df.merge(df_planeemissions,
                      how='left',
                      on=['FacilityID', 'FlowName'])
        df[['PlaneEmissions']] = df[['PlaneEmissions']].fillna(value=0)
        df['FlowAmount'] = df['FlowAmount'] - df['PlaneEmissions']
        df.drop(columns=['PlaneEmissions'], inplace=True)

        # add airplane emissions under air transport NAICS
        df_airplanes.loc[:, 'NAICS_lvl'] = air_transportation_naics[
            0:NAICS_level_value]
        df = pd.concat([df, df_airplanes], ignore_index=True)

    # update location to appropriate geoscale prior to aggregating
    df.dropna(subset=['Location'], inplace=True)
    df['Location'] = df['Location'].astype(str)
    df = update_geoscale(df, geo_scale)

    # assign grouping variables based on desired geographic aggregation level
    grouping_vars = ['NAICS_lvl', 'FlowName', 'Compartment', 'Location']

    # aggregate by NAICS code, FlowName, compartment, and geographic level
    fbs = df.groupby(grouping_vars).agg({
        'FlowAmount': 'sum',
        'Year': 'first',
        'Unit': 'first'
    })

    # add reliability score
    fbs['DataReliability'] = weighted_average(df, 'ReliabilityScore',
                                              'FlowAmount', grouping_vars)
    fbs.reset_index(inplace=True)

    # apply flow mapping
    fbs = map_elementary_flows(fbs, inventory_list)

    # rename columns to match flowbysector format
    fbs = fbs.rename(columns={"NAICS_lvl": "SectorProducedBy"})

    # add hardcoded data, depending on the source data, some of these fields may need to change
    fbs['Class'] = 'Chemicals'
    fbs['SectorConsumedBy'] = 'None'
    fbs['SectorSourceName'] = 'NAICS_2012_Code'
    fbs['FlowType'] = 'ELEMENTARY_FLOW'

    fbs = assign_fips_location_system(fbs, list(inventory_dict.values())[0])

    # add missing flow by sector fields
    fbs = add_missing_flow_by_fields(fbs, flow_by_sector_fields)

    # sort dataframe and reset index
    fbs = fbs.sort_values(list(
        flow_by_sector_fields.keys())).reset_index(drop=True)

    return fbs
def test_generate_combined_inventories(name, compartment, inv_dict):
    df = stewicombo.combineFullInventories(inv_dict,
                                           filter_for_LCI=True,
                                           remove_overlap=True,
                                           compartments=[compartment])
    stewicombo.saveInventory(name, df, inv_dict)