示例#1
0
def get_generation_mix_process_df(regions=None):
    """
    Create a dataframe of generation mixes by fuel type in each subregion.

    This function imports and uses the parameter 'gen_mix_from_model_generation_data'
    from globals.py. If the value is False it cannot currently handle regions
    other than 'all', 'NERC', 'US', or a single eGRID subregion.

    Parameters
    ----------
    use_alt_gen_process : str, optional
        Not currently used (the default is 'egrid', which [default_description])
    regions : str, optional
        Which regions to include (the default is 'all', which includes all eGRID
        subregions)

    Returns
    -------
    DataFrame
        Sample output:
        >>> all_gen_mix_db.head()
            Subregion FuelCategory   Electricity  NERC  Generation_Ratio
        0        AKGD         COAL  5.582922e+05  ASCC          0.116814
        22       AKGD          OIL  3.355753e+05  ASCC          0.070214
        48       AKGD          GAS  3.157474e+06  ASCC          0.660651
        90       AKGD        HYDRO  5.477350e+05  ASCC          0.114605
        114      AKGD      BIOMASS  5.616577e+04  ASCC          0.011752
    """
    from electricitylci.egrid_filter import (
        electricity_for_selected_egrid_facilities, )
    from electricitylci.generation_mix import (
        create_generation_mix_process_df_from_model_generation_data,
        create_generation_mix_process_df_from_egrid_ref_data,
    )
    from electricitylci.model_config import gen_mix_from_model_generation_data
    from electricitylci.model_config import replace_egrid
    from electricitylci.eia923_generation import build_generation_data
    from electricitylci.model_config import eia_gen_year

    if regions is None:
        regions = model_specs['regional_aggregation']

    if replace_egrid:
        # assert regions == 'BA' or regions == 'NERC', 'Regions must be BA or NERC'
        print("Actual generation data is used when replacing eGRID")
        generation_data = build_generation_data(
            generation_years=[eia_gen_year])
        generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data(
            generation_data, regions)
    else:
        if gen_mix_from_model_generation_data:
            generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data(
                electricity_for_selected_egrid_facilities, regions)
        else:
            generation_mix_process_df = create_generation_mix_process_df_from_egrid_ref_data(
                regions)
    return generation_mix_process_df
示例#2
0
def create_generation_process_df():
    """
    Reads emissions and generation data from different sources to provide
    facility-level emissions. Most important inputs to this process come
    from the model configuration file.

    Parameters
    ----------
    None

    Returns
    ----------
    dataframe
        Datafrane includes all facility-level emissions
    """
    from electricitylci.eia923_generation import (build_generation_data,
                                                  eia923_primary_fuel)
    from electricitylci.egrid_filter import (
        egrid_facilities_to_include,
        emissions_and_waste_for_selected_egrid_facilities,
    )
    from electricitylci.generation import (
        egrid_facilities_w_fuel_region,
        add_technological_correlation_score,
        add_temporal_correlation_score,
    )
    import electricitylci.emissions_other_sources as em_other
    import electricitylci.ampd_plant_emissions as ampd
    from electricitylci.combinator import ba_codes
    import electricitylci.manual_edits as edits

    COMPARTMENT_DICT = {
        "emission/air": "air",
        "emission/water": "water",
        "emission/ground": "ground",
        "input": "input",
        "output": "output",
        "waste": "waste",
        "air": "air",
        "water": "water",
        "ground": "ground",
    }
    if model_specs.replace_egrid:
        generation_data = build_generation_data().drop_duplicates()
        cems_df = ampd.generate_plant_emissions(model_specs.eia_gen_year)
        cems_df.drop(columns=["FlowUUID"], inplace=True)
        emissions_and_waste_for_selected_egrid_facilities = em_other.integrate_replace_emissions(
            cems_df, emissions_and_waste_for_selected_egrid_facilities)
    else:
        from electricitylci.egrid_filter import electricity_for_selected_egrid_facilities
        generation_data = electricity_for_selected_egrid_facilities
        generation_data["Year"] = model_specs.egrid_year
        generation_data["FacilityID"] = generation_data["FacilityID"].astype(
            int)


#        generation_data = build_generation_data(
#            egrid_facilities_to_include=egrid_facilities_to_include
#        )
    emissions_and_waste_for_selected_egrid_facilities.drop(
        columns=["FacilityID"])
    emissions_and_waste_for_selected_egrid_facilities[
        "eGRID_ID"] = emissions_and_waste_for_selected_egrid_facilities[
            "eGRID_ID"].astype(int)
    final_database = pd.merge(
        left=emissions_and_waste_for_selected_egrid_facilities,
        right=generation_data,
        right_on=["FacilityID", "Year"],
        left_on=["eGRID_ID", "Year"],
        how="left",
    )
    egrid_facilities_w_fuel_region[
        "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype(
            int)
    final_database = pd.merge(
        left=final_database,
        right=egrid_facilities_w_fuel_region,
        left_on="eGRID_ID",
        right_on="FacilityID",
        how="left",
        suffixes=["", "_right"],
    )
    if model_specs.replace_egrid:
        primary_fuel_df = eia923_primary_fuel(year=model_specs.eia_gen_year)
        primary_fuel_df.rename(columns={'Plant Id': "eGRID_ID"}, inplace=True)
        primary_fuel_df["eGRID_ID"] = primary_fuel_df["eGRID_ID"].astype(int)
        key_df = (primary_fuel_df[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database["FuelCategory"] = final_database["eGRID_ID"].map(
            key_df["FuelCategory"])
    else:
        key_df = (final_database[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database.loc[final_database["FuelCategory"].isnull(),
                           "FuelCategory"] = final_database.loc[
                               final_database["FuelCategory"].isnull(),
                               "eGRID_ID"].map(key_df["FuelCategory"])
    # if replace_egrid:
    #     final_database["FuelCategory"].fillna(
    #         final_database["FuelCategory_right"], inplace=True
    #     )
    final_database["Final_fuel_agg"] = final_database["FuelCategory"]
    # if model_specs.use_primaryfuel_for_coal:
    #     final_database.loc[
    #         final_database["FuelCategory"] == "COAL", ["Final_fuel_agg"]
    #     ] = final_database.loc[
    #         final_database["FuelCategory"] == "COAL", "PrimaryFuel"
    #     ]
    try:
        year_filter = final_database["Year_x"] == final_database["Year_y"]
        final_database = final_database.loc[year_filter, :]
        final_database.drop(columns="Year_y", inplace=True)
    except KeyError:
        pass
    final_database.rename(columns={"Year_x": "Year"}, inplace=True)
    final_database = map_emissions_to_fedelemflows(final_database)
    dup_cols_check = [
        "FacilityID",
        "FuelCategory",
        "FlowName",
        "FlowAmount",
        "Compartment",
    ]
    final_database = final_database.loc[:,
                                        ~final_database.columns.duplicated()]
    final_database = final_database.drop_duplicates(subset=dup_cols_check)
    final_database.drop(
        columns=["FuelCategory", "FacilityID_x", "FacilityID_y"], inplace=True)
    final_database.rename(
        columns={
            "Final_fuel_agg": "FuelCategory",
            "TargetFlowUUID": "FlowUUID",
        },
        inplace=True,
    )
    final_database = add_temporal_correlation_score(
        final_database, model_specs.electricity_lci_target_year)
    final_database = add_technological_correlation_score(final_database)
    final_database["DataCollection"] = 5
    final_database["GeographicalCorrelation"] = 1

    final_database["eGRID_ID"] = final_database["eGRID_ID"].astype(int)

    final_database.sort_values(by=["eGRID_ID", "Compartment", "FlowName"],
                               inplace=True)
    final_database["stage_code"] = "Power plant"
    final_database["Compartment_path"] = final_database["Compartment"]
    final_database["Compartment"] = final_database["Compartment_path"].map(
        COMPARTMENT_DICT)
    final_database["Balancing Authority Name"] = final_database[
        "Balancing Authority Code"].map(ba_codes["BA_Name"])
    final_database["EIA_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["EIA_Region"])
    final_database["FERC_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["FERC_Region"])
    final_database = edits.check_for_edits(final_database, "generation.py",
                                           "create_generation_process_df")
    return final_database
示例#3
0
def get_generation_mix_process_df(regions=None):
    """
    Create a dataframe of generation mixes by fuel type in each subregion.

    This function imports and uses the parameter 'replace_egrid' and 
    'gen_mix_from_model_generation_data' from model_config.py. If 'replace_egrid'
    is true or the specified 'regions' is true, then the generation mix will 
    come from EIA 923 data. If 'replace_egrid' is false then the generation
    mix will either come from the eGRID reference data 
    ('gen_mix_from_model_generation_data' is false) or from the generation data
    from this model ('gen_mix_from_model_generation_data' is true).

    Parameters
    ----------
    regions : str, optional
        Which regions to include (the default is 'all', which includes all eGRID
        subregions)

    Returns
    -------
    DataFrame
        Sample output:
        >>> all_gen_mix_db.head()
            Subregion FuelCategory   Electricity  NERC  Generation_Ratio
        0        AKGD         COAL  5.582922e+05  ASCC          0.116814
        22       AKGD          OIL  3.355753e+05  ASCC          0.070214
        48       AKGD          GAS  3.157474e+06  ASCC          0.660651
        90       AKGD        HYDRO  5.477350e+05  ASCC          0.114605
        114      AKGD      BIOMASS  5.616577e+04  ASCC          0.011752
    """
    from electricitylci.egrid_filter import (
        electricity_for_selected_egrid_facilities, )
    from electricitylci.generation_mix import (
        create_generation_mix_process_df_from_model_generation_data,
        create_generation_mix_process_df_from_egrid_ref_data,
    )
    from electricitylci.eia923_generation import build_generation_data

    if regions is None:
        regions = config.model_specs.regional_aggregation

    if config.model_specs.replace_egrid or regions in ["BA", "FERC", "US"]:
        # assert regions == 'BA' or regions == 'NERC', 'Regions must be BA or NERC'
        if regions in ["BA", "FERC", "US"
                       ] and not config.model_specs.replace_egrid:
            logger.info(
                f"EIA923 generation data is being used for the generation mix "
                f"despite replace_egrid = False. The reference eGrid electricity "
                f"data cannot be reorgnznied to match BA or FERC regions. For "
                f"the US region, the function for generating US mixes does not "
                f"support aggregating to the US.")
        print("EIA923 generation data is used when replacing eGRID")
        generation_data = build_generation_data(
            generation_years=[config.model_specs.eia_gen_year])
        generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data(
            generation_data, regions)
    else:
        if config.model_specs.gen_mix_from_model_generation_data:
            generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data(
                electricity_for_selected_egrid_facilities, regions)
        else:
            generation_mix_process_df = create_generation_mix_process_df_from_egrid_ref_data(
                regions)
    return generation_mix_process_df
示例#4
0
def get_generation_process_df(use_alt_gen_process=None,
                              regions=None,
                              **kwargs):
    """
    Create a dataframe of emissions from power generation by fuel type in each
    region. kwargs would include the upstream emissions dataframe (upstream_df) if
    upstream emissions are being included.

    Parameters
    ----------
    use_alt_gen_process : bool, optional
        If the NETL alternate generation process method should be used (the default is
        None, which uses the value is read from a settings YAML file).
    regions : str, optional
        Regions to include in the analysis (the default is None, which uses the value
        read from a settings YAML file). Other options include "eGRID", "NERC", "BA",
        "US", "FERC", and "EIA"

    Returns
    -------
    If
    DataFrame
        Each row represents information about a single emission from a fuel category
        in a single region. Columns are:

       'Subregion', 'FuelCategory', 'FlowName', 'FlowUUID', 'Compartment',
       'Year', 'Source', 'Unit', 'ElementaryFlowPrimeContext',
       'TechnologicalCorrelation', 'TemporalCorrelation', 'DataCollection',
       'Emission_factor', 'Reliability_Score', 'GeographicalCorrelation',
       'GeomMean', 'GeomSD', 'Maximum', 'Minimum'
    """
    if use_alt_gen_process is None:
        use_alt_gen_process = model_specs['use_alt_gen_process']
    if regions is None:
        regions = model_specs['regional_aggregation']

    if use_alt_gen_process is True:
        try:
            upstream_df = kwargs['upstream_df']
        except KeyError:
            print(
                "A kwarg named 'upstream_dict' must be included if use_alt_gen_process "
                "is True")
        # upstream_df = get_upstream_process_df()
        if model_specs['include_upstream_processes'] is True:
            upstream_dict = write_upstream_process_database_to_dict(
                upstream_df)
            upstream_dict = write_upstream_dicts_to_jsonld(upstream_dict)
            gen_df = get_alternate_gen_plus_netl()
            combined_df, canadian_gen = combine_upstream_and_gen_df(
                gen_df, upstream_df)
            gen_plus_fuels = add_fuels_to_gen(gen_df, upstream_df,
                                              canadian_gen, upstream_dict)
        else:
            gen_df = get_alternate_gen_plus_netl()
            upstream_df = pd.DataFrame(columns=gen_df.columns)
            upstream_dict = {}
            gen_plus_fuels = gen_df
        #This change has been made to accomodate the new method of generating
        #consumption mixes for FERC regions. They now pull BAs to provide
        #a more accurate inventory. The tradeoff here is that it's no longer possible
        #to make a FERC region generation mix and also provide the consumption mix.
        #Or it could be possible but would requir running through aggregate twice.


#        generation_process_df = aggregate_gen(
#            gen_plus_fuels, subregion=regions
#        )
        generation_process_df = aggregate_gen(gen_plus_fuels, subregion="BA")
        return generation_process_df

    else:
        from electricitylci.egrid_filter import (
            electricity_for_selected_egrid_facilities,
            egrid_facilities_to_include,
            emissions_and_waste_for_selected_egrid_facilities,
        )
        from electricitylci.eia923_generation import build_generation_data
        from electricitylci.generation import create_generation_process_df
        from electricitylci.model_config import replace_egrid

        if replace_egrid is True:
            # This is a dummy function that doesn't exist yet
            # updated_emissions = build_new_emissions(year)

            generation_data = build_generation_data()
            generation_process_df = create_generation_process_df(
                generation_data,
                emissions_and_waste_for_selected_egrid_facilities,
                subregion=regions,
            )

        else:
            electricity_for_selected_egrid_facilities["Year"] = model_specs[
                "egrid_year"]
            generation_process_df = create_generation_process_df(
                electricity_for_selected_egrid_facilities,
                emissions_and_waste_for_selected_egrid_facilities,
                subregion=regions,
            )
        return generation_process_df
def generate_regional_grid_loss(final_database, year, subregion="all"):
    """This function generates transmission and distribution losses for the
    provided generation data and given year, aggregated by subregion.

    Arguments:
        final_database: dataframe
            The database containing plant-level emissions.
        year: int
            Analysis year for the transmission and distribution loss data.
            Ideally this should match the year of your final_database.
    Returns:
        td_by_region: dataframe
            A dataframe of transmission and distribution loss rates as a
            fraction. This dataframe can be used to generate unit processes
            for transmission and distribution to match the regionally-
            aggregated emissions unit processes.
    """
    print("Generating factors for transmission and distribution losses")
    from electricitylci.eia923_generation import build_generation_data
    from electricitylci.combinator import ba_codes
    from electricitylci.egrid_facilities import egrid_facilities
    td_calc_columns = [
        "State",
        "NERC",
        "FuelCategory",
        "PrimaryFuel",
        "NERC",
        "Balancing Authority Name",
        "Electricity",
        "Year",
        "Subregion",
        "FRS_ID",
        "eGRID_ID",
    ]
    #    plant_generation = final_database[td_calc_columns].drop_duplicates()
    egrid_facilities_w_fuel_region = egrid_facilities[[
        "FacilityID", "Subregion", "PrimaryFuel", "FuelCategory", "NERC",
        "PercentGenerationfromDesignatedFuelCategory",
        "Balancing Authority Name", "Balancing Authority Code", "State"
    ]]
    egrid_facilities_w_fuel_region[
        "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype(
            int)
    plant_generation = build_generation_data(generation_years=[year])
    plant_generation["FacilityID"] = plant_generation["FacilityID"].astype(int)
    plant_generation = plant_generation.merge(egrid_facilities_w_fuel_region,
                                              on=["FacilityID"],
                                              how="left")
    plant_generation["Balancing Authority Name"] = plant_generation[
        "Balancing Authority Code"].map(ba_codes["BA_Name"])
    plant_generation["FERC_Region"] = plant_generation[
        "Balancing Authority Code"].map(ba_codes["FERC_Region"])
    plant_generation["EIA_Region"] = plant_generation[
        "Balancing Authority Code"].map(ba_codes["EIA_Region"])
    td_rates = eia_trans_dist_download_extract(f"{year}")
    td_by_plant = pd.merge(
        left=plant_generation,
        right=td_rates,
        left_on="State",
        right_index=True,
        how="left",
    )
    td_by_plant.dropna(subset=["t_d_losses"], inplace=True)
    td_by_plant["t_d_losses"] = td_by_plant["t_d_losses"].astype(float)

    from electricitylci.aggregation_selector import subregion_col
    aggregation_column = subregion_col(subregion)
    wm = lambda x: np.average(x,
                              weights=td_by_plant.loc[x.index, "Electricity"])
    if aggregation_column is not None:
        td_by_region = td_by_plant.groupby(
            aggregation_column, as_index=False).agg({"t_d_losses": wm})
    else:
        td_by_region = pd.DataFrame(td_by_plant.agg({"t_d_losses": wm}),
                                    columns=["t_d_losses"])
        td_by_region["Region"] = "US"
    return td_by_region
def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None):
    REGION_NAMES = [
        'California', 'Carolinas', 'Central',
        'Electric Reliability Council of Texas, Inc.', 'Florida',
        'Mid-Atlantic', 'Midwest', 'New England ISO',
        'New York Independent System Operator', 'Northwest', 'Southeast',
        'Southwest', 'Tennessee Valley Authority'
    ]

    REGION_ACRONYMS = [
        'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA',
        'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW',
    ]
    if year is None:
        year = model_specs.NETL_IO_trading_year
    if subregion is None:
        subregion = model_specs.regional_aggregation
    if subregion not in ['BA', 'FERC','US']:
        raise ValueError(
            f'subregion or regional_aggregation must have a value of "BA" or "FERC" '
            f'when calculating trading with input-output, not {subregion}'
        )

    # Read in BAA file which contains the names and abbreviations
    df_BA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'US', header = 4)
    df_BA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True)
    BA = pd.np.array(df_BA['BA_Acronym'])
    US_BA_acronyms = df_BA['BA_Acronym'].tolist()

    # Read in BAA file which contains the names and abbreviations
    # Original df_BAA does not include the Canadian balancing authorities
    # Import them here, then concatenate to make a single df_BAA_NA (North America)

    df_BA_CA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'Canada', header = 4)
    df_BA_CA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True)
    df_BA_NA = pd.concat([df_BA, df_BA_CA])
    ferc_list = df_BA_NA['FERC_Region_Abbr'].unique().tolist()

    # Read in the bulk data

#    download_EBA()
    path = join(data_dir, 'bulk_data', 'EBA.zip')
    NET_GEN_ROWS = []
    BA_TO_BA_ROWS = []
    DEMAND_ROWS=[]
    TOTAL_INTERCHANGE_ROWS=[]
    try:
        logging.info("Using existing bulk data download")
        z = zipfile.ZipFile(path, 'r')

    except FileNotFoundError:
        logging.info("Downloading new bulk data")
        download_EBA()
        z = zipfile.ZipFile(path, 'r')
    logging.info("Loading bulk data to json")
    with z.open('EBA.txt') as f:
        for line in f:
            # All but one BA is currently reporting net generation in UTC and local time
            # for that one BA (GRMA) only UTC time is reported - so only pulling that
            # for now.
            if b'EBA.NG.H' in line and b'EBA.NG.HL' not in line:
                NET_GEN_ROWS.append(json.loads(line))
            # Similarly there are 5 interchanges that report interchange in UTC but not in
            # local time.
            elif b'EBA.ID.H' in line and b'EBA.ID.HL' not in line:
                exchange_line=json.loads(line)
                if exchange_line['series_id'].split('-')[0][4:] not in REGION_ACRONYMS:
#                    try:
                        # Adding this check here to hopefully save some time down the road.
                        # dummy_date=datetime.strptime(exchange_line['data'][0][0],'%Y%m%dT%HZ')
                        BA_TO_BA_ROWS.append(exchange_line)
                        # good_date_count+=1
#                    except ValueError:
                        # bad_date_count+=1
#                        continue
            # Keeping these here just in case
            elif b'EBA.D.H' in line and b'EBA.D.HL' not in line:
                DEMAND_ROWS.append(json.loads(line))
#            elif b'EBA.TI.H' in line:
#                TOTAL_INTERCHANGE_ROWS.append(json.loads(line))
    logging.info(f"Net gen rows: {len(NET_GEN_ROWS)}; BA to BA rows:{len(BA_TO_BA_ROWS)}; Demand rows:{len(DEMAND_ROWS)}")
    eia923_gen=eia923.build_generation_data(generation_years=[year])
    eia860_df=eia860.eia860_balancing_authority(year)
    eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int)

    eia_combined_df=eia923_gen.merge(eia860_df,
                                     left_on=["FacilityID"],
                                     right_on=["Plant Id"],
                                     how="left")
    eia_gen_ba=eia_combined_df.groupby(by=["Balancing Authority Code"],as_index=False)["Electricity"].sum()

    # Subset for specified eia_gen_year
    start_datetime = '{}-01-01 00:00:00+00:00'.format(year)
    end_datetime = '{}-12-31 23:00:00+00:00'.format(year)

    start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z')
    end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z')

    # Net Generation Data Import
    logging.info("Generating df with datetime")
    df_net_gen = row_to_df(NET_GEN_ROWS, 'net_gen')
    del(NET_GEN_ROWS)
    logging.info("Pivoting")
    df_net_gen = df_net_gen.pivot(index = 'datetime', columns = 'region', values = 'net_gen')
    ba_cols = US_BA_acronyms

    gen_cols = list(df_net_gen.columns.values)

    gen_cols_set = set(gen_cols)
    ba_ref_set = set(ba_cols)

    col_diff = list(ba_ref_set - gen_cols_set)
    col_diff.sort(key = str.upper)
    logging.info("Cleaning net_gen dataframe")

    # Add in missing columns, then sort in alphabetical order
    for i in col_diff:
        df_net_gen[i] = 0

    # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset
    # that represent states (e.g., TEX, NY, FL) and other areas (US48)

    df_net_gen = df_net_gen[ba_cols]

    # Resort columns so the headers are in alpha order
    df_net_gen = df_net_gen.sort_index(axis=1)
    df_net_gen = df_net_gen.fillna(value = 0)

    df_net_gen = df_net_gen.loc[start_datetime:end_datetime]

    # Sum values in each column
    df_net_gen_sum = df_net_gen.sum(axis = 0).to_frame()
    logging.info("Reading canadian import data")
    # Add Canadian import data to the net generation dataset, concatenate and put in alpha order
    df_CA_Imports_Gen = pd.read_csv(data_dir + '/CA_Imports_Gen.csv', index_col = 0)
    df_CA_Imports_Gen = df_CA_Imports_Gen[str(year)]

    logging.info("Combining US and Canadian net gen data")
    df_net_gen_sum = pd.concat([df_net_gen_sum,df_CA_Imports_Gen]).sum(axis=1)
    df_net_gen_sum = df_net_gen_sum.to_frame()
    df_net_gen_sum = df_net_gen_sum.sort_index(axis=0)

    # Check the net generation of each Balancing Authority against EIA 923 data.
    # If the percent change of a given area is greater than the mean absolute difference
    # of all of the areas, it will be treated as an error and replaced with the
    # value in EIA923.
    logging.info("Checking against EIA 923 generation data")
    net_gen_check=df_net_gen_sum.merge(
            right=eia_gen_ba,
            left_index=True,
            right_on=["Balancing Authority Code"],
            how="left"
            ).reset_index()
    net_gen_check["diff"]=abs(net_gen_check["Electricity"]-net_gen_check[0])/net_gen_check[0]
    diff_mad=net_gen_check["diff"].mad()
    net_gen_swap=net_gen_check.loc[net_gen_check["diff"]>diff_mad,["Balancing Authority Code","Electricity"]].set_index("Balancing Authority Code")
    df_net_gen_sum.loc[net_gen_swap.index,[0]]=np.nan
    net_gen_swap.rename(columns={"Electricity":0},inplace=True)
    df_net_gen_sum=df_net_gen_sum.combine_first(net_gen_swap)
    # First work on the trading data from the 'df_trade_all_stack_2016' frame
    # This cell does the following:
    # 1. reformats the data to an annual basis
    # 2. formats the BA names in the corresponding columns
    # 3. evalutes the trade values from both BA perspectives
    #  (e.g. BA1 as exporter and importer in a transaction with BA2)
    # 4. evaluates the trading data for any results that don't make sense
    #       a. both BAs designate as importers (negative value)
    #       b. both BAs designate as exporters (postive value)
    #       c. one of the BAs in the transation reports a zero value and the other is nonzero
    # 5. calulate the percent difference in the transaction values reports by BAs
    # 6. final exchange value based on logic;
    #       a. if percent diff is less than 20%, take mean,
    #       b. if not use the value as reported by the exporting BAA
    #       c. designate each BA in the transaction either as the importer or exporter
    # Output is a pivot with index (rows) representing exporting BAs,
    #   columns representing importing BAs, and values for the traded amount

    # Group and resample trading data so that it is on an annual basis

    logging.info("Creating trading dataframe")
    df_ba_trade = ba_exchange_to_df(BA_TO_BA_ROWS, data_type='ba_to_ba')
    del(BA_TO_BA_ROWS)
    df_ba_trade = df_ba_trade.set_index('datetime')
    df_ba_trade['transacting regions'] = df_ba_trade['from_region'] + '-' + df_ba_trade['to_region']

    logging.info("Filtering trading dataframe")
    # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset
    # that represent states (e.g., TEX, NY, FL) and other areas (US48)
    filt1 = df_ba_trade['from_region'].isin(ba_cols)
    filt2 = df_ba_trade['to_region'].isin(ba_cols)
    filt = filt1 & filt2
    df_ba_trade = df_ba_trade[filt]

    # Subset for eia_gen_year, need to pivot first because of non-unique datetime index
    df_ba_trade_pivot = df_ba_trade.pivot(columns = 'transacting regions', values = 'ba_to_ba')

    df_ba_trade_pivot = df_ba_trade_pivot.loc[start_datetime:end_datetime]

    # Sum columns - represents the net transactced amount between the two BAs
    df_ba_trade_sum = df_ba_trade_pivot.sum(axis = 0).to_frame()
    df_ba_trade_sum = df_ba_trade_sum.reset_index()
    df_ba_trade_sum.columns = ['BAAs','Exchange']

    # Split BAA string into exporting and importing BAA columns
    df_ba_trade_sum['BAA1'], df_ba_trade_sum['BAA2'] = df_ba_trade_sum['BAAs'].str.split('-', 1).str
    df_ba_trade_sum = df_ba_trade_sum.rename(columns={'BAAs': 'Transacting BAAs'})

    # Create two perspectives - import and export to use for comparison in selection of the final exchange value between the BAAs
    df_trade_sum_1_2 = df_ba_trade_sum.groupby(['BAA1', 'BAA2','Transacting BAAs'], as_index=False)[['Exchange']].sum()
    df_trade_sum_2_1 = df_ba_trade_sum.groupby(['BAA2', 'BAA1', 'Transacting BAAs'], as_index=False)[['Exchange']].sum()
    df_trade_sum_1_2.columns = ['BAA1_1_2', 'BAA2_1_2','Transacting BAAs_1_2', 'Exchange_1_2']
    df_trade_sum_2_1.columns = ['BAA2_2_1', 'BAA1_2_1','Transacting BAAs_2_1', 'Exchange_2_1']

    # Combine two grouped tables for comparison for exchange values
    df_concat_trade = pd.concat([df_trade_sum_1_2,df_trade_sum_2_1], axis = 1)
    df_concat_trade['Exchange_1_2_abs'] = df_concat_trade['Exchange_1_2'].abs()
    df_concat_trade['Exchange_2_1_abs'] = df_concat_trade['Exchange_2_1'].abs()

    # Create new column to check if BAAs designate as either both exporters or both importers
    # or if one of the entities in the transaction reports a zero value
    # Drop combinations where any of these conditions are true, keep everything else
    df_concat_trade['Status_Check'] = np.where(((df_concat_trade['Exchange_1_2'] > 0) & (df_concat_trade['Exchange_2_1'] > 0)) \
                   |((df_concat_trade['Exchange_1_2'] < 0) & (df_concat_trade['Exchange_2_1'] < 0)) \
                   | ((df_concat_trade['Exchange_1_2'] == 0) | (df_concat_trade['Exchange_2_1'] == 0)), 'drop', 'keep')

    # Calculate the difference in exchange values
    df_concat_trade['Delta'] = df_concat_trade['Exchange_1_2_abs'] - df_concat_trade['Exchange_2_1_abs']

    # Calculate percent diff of exchange_abs values - this can be down two ways:
    # relative to 1_2 exchange or relative to 2_1 exchange - perform the calc both ways
    # and take the average
    df_concat_trade['Percent_Diff_Avg']= ((abs((df_concat_trade['Exchange_1_2_abs']/df_concat_trade['Exchange_2_1_abs'])-1)) \
        + (abs((df_concat_trade['Exchange_2_1_abs']/df_concat_trade['Exchange_1_2_abs'])-1)))/2

    # Mean exchange value
    df_concat_trade['Exchange_mean'] = df_concat_trade[['Exchange_1_2_abs', 'Exchange_2_1_abs']].mean(axis=1)

    # Percent diff equations creats NaN where both values are 0, fill with 0
    df_concat_trade['Percent_Diff_Avg'].fillna(0, inplace = True)

    # Final exchange value based on logic; if percent diff is less than 20%, take mean,
    # if not use the value as reported by the exporting BAA. First figure out which BAA is the exporter
    # by checking the value of the Exchance_1_2
    # If that value is positive, it indicates that BAA1 is exported to BAA2; if negative, use the
    # value from Exchange_2_1
    df_concat_trade['Final_Exchange'] = np.where((df_concat_trade['Percent_Diff_Avg'].abs() < 0.2),
                   df_concat_trade['Exchange_mean'],np.where((df_concat_trade['Exchange_1_2'] > 0),
                                  df_concat_trade['Exchange_1_2'],df_concat_trade['Exchange_2_1']))


    # Assign final designation of BAA as exporter or importer based on logical assignment
    df_concat_trade['Export_BAA'] = np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA1_1_2'],
                   np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA2_1_2'],''))

    df_concat_trade['Import_BAA'] = np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA1_1_2'],
                   np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA2_1_2'],''))

    df_concat_trade = df_concat_trade[df_concat_trade['Status_Check'] == 'keep']


    # Create the final trading matrix; first grab the necessary columns, rename the columns and then pivot
    df_concat_trade_subset = df_concat_trade[['Export_BAA', 'Import_BAA', 'Final_Exchange']]

    df_concat_trade_subset.columns = ['Exporting_BAA', 'Importing_BAA', 'Amount']

    df_trade_pivot = df_concat_trade_subset.pivot_table(index = 'Exporting_BAA', columns = 'Importing_BAA', values = 'Amount').fillna(0)


    # This cell continues formatting the df_trade
    # Find missing BAs - need to add them in so that we have a square matrix
    # Not all BAs are involved in transactions

    trade_cols = list(df_trade_pivot.columns.values)
    trade_rows = list(df_trade_pivot.index.values)

    trade_cols_set = set(trade_cols)
    trade_rows_set = set(trade_rows)
    trade_ba_ref_set = set(ba_cols)

    trade_col_diff = list(trade_ba_ref_set - trade_cols_set)
    trade_col_diff.sort(key = str.upper)

    trade_row_diff = list(trade_ba_ref_set - trade_rows_set)
    trade_row_diff.sort(key=str.upper)

    # Add in missing columns, then sort in alphabetical order
    for i in trade_col_diff:
        df_trade_pivot[i] = 0

    df_trade_pivot = df_trade_pivot.sort_index(axis=1)

    # Add in missing rows, then sort in alphabetical order
    for i in trade_row_diff:
        df_trade_pivot.loc[i,:] = 0

    df_trade_pivot = df_trade_pivot.sort_index(axis=0)

    # Add Canadian Imports to the trading matrix
    # CA imports are specified in an external file
    df_CA_Imports_Cols = pd.read_csv(data_dir + '/CA_Imports_Cols.csv', index_col = 0)

    df_CA_Imports_Rows = pd.read_csv(data_dir + '/CA_Imports_Rows.csv', index_col = 0)
    df_CA_Imports_Rows = df_CA_Imports_Rows[['us_ba', str(year)]]
    df_CA_Imports_Rows = df_CA_Imports_Rows.pivot(columns = 'us_ba', values = str(year))

    df_concat_trade_CA = pd.concat([df_trade_pivot, df_CA_Imports_Rows])
    df_concat_trade_CA = pd.concat([df_concat_trade_CA, df_CA_Imports_Cols], axis = 1)
    df_concat_trade_CA.fillna(0, inplace = True)
    df_trade_pivot = df_concat_trade_CA
    df_trade_pivot = df_trade_pivot.sort_index(axis=0)
    df_trade_pivot = df_trade_pivot.sort_index(axis=1)

    # Perform trading calculations as provided in Qu et al (2018) to
    # determine the composition of a BA consumption mix

    # Create total inflow vector x and then convert to a diagonal matrix x-hat
    logging.info("Inflow vector")
    x = []
    for i in range (len(df_net_gen_sum)):
        x.append(df_net_gen_sum.iloc[i] + df_trade_pivot.sum(axis = 0).iloc[i])

    x_np = np.array(x)

    # If values are zero, x_hat matrix will be singular, set BAAs with 0 to small value (1)
    df_x = pd.DataFrame(data = x_np, index = df_trade_pivot.index)
    df_x = df_x.rename(columns = {0:'inflow'})
    df_x.loc[df_x['inflow'] == 0] = 1

    x_np = df_x.values

    x_hat = np.diagflat(x_np)


    # Create consumption vector c and then convert to a digaonal matrix c-hat
    # Calculate c based on x and T
    logging.info("consumption vector")
    c = []

    for i in range(len(df_net_gen_sum)):
        c.append(x[i] - df_trade_pivot.sum(axis = 1).iloc[i])

    c_np = np.array(c)
    c_hat = np.diagflat(c_np)

    # Convert df_trade_pivot to matrix
    T = df_trade_pivot.values

    # Create matrix to split T into distinct interconnections - i.e., prevent trading between eastern and western interconnects
    # Connections between the western and eastern interconnects are through SWPP and WAUE
    logging.info("Matrix operations")
    interconnect = df_trade_pivot.copy()
    interconnect[:] = 1
    interconnect.loc['SWPP',['EPE', 'PNM', 'PSCO', 'WACM']] = 0
    interconnect.loc['WAUE',['WAUW', 'WACM']] = 0
    interconnect_mat = interconnect.values
    T_split = np.multiply(T, interconnect_mat)

    # Matrix trading math (see Qu et al. 2018 ES&T paper)
    x_hat_inv = np.linalg.inv(x_hat)

    B = np.matmul(T_split, x_hat_inv)

    I = np.identity(len(df_net_gen_sum))

    diff_I_B = I - B

    G = np.linalg.inv(diff_I_B)

    c_hat_x_hat_inv = np.matmul(c_hat, x_hat_inv)

    G_c = np.matmul(G, c_hat)
    H = np.matmul(G,c_hat, x_hat_inv)

    df_G = pd.DataFrame(G)
    df_B = pd.DataFrame(B)
    df_H = pd.DataFrame(H)

    # Convert H to pandas dataframe, populate index and columns

    df_final_trade_out = df_H
    df_final_trade_out.columns = df_net_gen_sum.index
    df_final_trade_out.index = df_net_gen_sum.index

    # Develop trading input for the eLCI code. Need to melt the dataframe to end up with a three column
    # dataframe:Repeat for both possible aggregation levels - BA and FERC market region

    # Establish a threshold of 0.00001 to be included in the final trading matrix
    # Lots of really small values as a result of the matrix calculate (e.g., 2.0e-15)

    df_final_trade_out_filt = df_final_trade_out.copy()
    col_list = df_final_trade_out.columns.tolist()
    #Adding in a filter for balancing authorities that are not associated
    #with any specific plants in EIA860 - there won't be any data for them in
    #the emissions dataframes. We'll set their quantities to 0 so that the
    #consumption mixes are made up of the rest of the incoming balancing
    #authority areas.
    eia860_bas=sorted(
        list(eia860_df["Balancing Authority Code"].dropna().unique())
        +list(df_CA_Imports_Cols.columns)
        )
    keep_rows = [x for x in df_final_trade_out_filt.index if x in eia860_bas]
    keep_cols = [x for x in df_final_trade_out_filt.columns if x in eia860_bas]
    df_final_trade_out_filt=df_final_trade_out_filt.loc[keep_rows,keep_cols]
    col_list = df_final_trade_out_filt.columns.tolist()
    for i in col_list:
        df_final_trade_out_filt[i] = np.where(df_final_trade_out_filt[i].abs()/df_final_trade_out_filt[i].sum() < 0.00001, 0, df_final_trade_out_filt[i].abs())

    df_final_trade_out_filt = df_final_trade_out_filt.reset_index()
    df_final_trade_out_filt = df_final_trade_out_filt.rename(columns = {'index':'Source BAA'})

    df_final_trade_out_filt_melted = df_final_trade_out_filt.melt(id_vars = 'Source BAA' , value_vars=col_list)
    df_final_trade_out_filt_melted = df_final_trade_out_filt_melted.rename(columns = {'Source BAA':'export BAA', 'variable':'import BAA'})


    # Merge to bring in import region name matched with BAA
    df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted.merge(df_BA_NA, left_on = 'import BAA', right_on = 'BA_Acronym')
    df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'import ferc region', 'FERC_Region_Abbr':'import ferc region abbr'}, inplace=True)
    df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True)

    # Merge to bring in export region name matched with BAA
    df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted_merge.merge(df_BA_NA, left_on = 'export BAA', right_on = 'BA_Acronym')
    if regions_to_keep is not None:
        # module_logger.info(f"{regions_to_keep}")
        # module_logger.info(f"{df_final_trade_out_filt_melted_merge['BA_Name'].unique()}")
        df_final_trade_out_filt_melted_merge=df_final_trade_out_filt_melted_merge.loc[df_final_trade_out_filt_melted_merge["BA_Name"].isin(regions_to_keep),:]
    df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'export ferc region', 'FERC_Region_Abbr':'export ferc region abbr'}, inplace=True)
    df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True)
#    if subregion == 'BA':
    # Develop final df for BAA
    BAA_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import BAA'])['value'].sum().reset_index()
    BAA_final_trade = df_final_trade_out_filt_melted_merge.copy()
    BAA_final_trade = BAA_final_trade.drop(columns = ['import ferc region', 'export ferc region', 'import ferc region abbr', 'export ferc region abbr'])
    BAA_final_trade = BAA_final_trade.merge(BAA_import_grouped_tot, left_on = 'import BAA', right_on = 'import BAA')
    BAA_final_trade = BAA_final_trade.rename(columns = {'value_x':'value','value_y':'total'})
    BAA_final_trade['fraction'] = BAA_final_trade['value']/BAA_final_trade['total']
    BAA_final_trade = BAA_final_trade.fillna(value = 0)
    BAA_final_trade = BAA_final_trade.drop(columns = ['value', 'total'])
    # Remove Canadian BAs in import list
    BAA_filt = BAA_final_trade['import BAA'].isin(eia860_bas)
    BAA_final_trade = BAA_final_trade[BAA_filt]
    # There are some BAs that will have 0 trade. Some of these are legitimate
    # Alcoa Yadkin has no demand (i.e., all power generation is exported) others
    # seem to be errors. For those BAs with actual demand, we'll set the 
    # consumption mix to 100% from that BA. For those without demand,
    # fraction will be set to near 0 just to make sure systems can be built
    # in openLCA
    BAA_zero_trade = [x for x in list(BAA_final_trade["import BAA"].unique()) if BAA_final_trade.loc[BAA_final_trade["import BAA"]==x,"fraction"].sum()==0]
    BAAs_from_zero_trade_with_demand = []
    for d_row in DEMAND_ROWS:
        if d_row["series_id"].split('.')[1].split('-')[0] in BAA_zero_trade:
            BAAs_from_zero_trade_with_demand.append(d_row["series_id"].split('.')[1].split('-')[0])
    BAAs_from_zero_trade_with_demand = list(set(BAAs_from_zero_trade_with_demand))
    del(DEMAND_ROWS)
    for baa in BAAs_from_zero_trade_with_demand:
        BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1
    for baa in list(set(BAA_zero_trade)-set(BAAs_from_zero_trade_with_demand)):
        BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1E-15
        #Was later decided to not create consumption mixes for BAs that don't have imports.
        BAA_final_trade.drop(BAA_final_trade[BAA_final_trade["import BAA"]==baa].index,inplace=True)
    BAA_final_trade.to_csv(output_dir + '/BAA_final_trade_{}.csv'.format(year))
    BAA_final_trade["export_name"]=BAA_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
    BAA_final_trade["import_name"]=BAA_final_trade["import BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return BAA_final_trade
#    elif subregion == 'FERC':
    ferc_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import ferc region'])['value'].sum().reset_index()
    # Develop final df for FERC Market Region
    ferc_final_trade = df_final_trade_out_filt_melted_merge.copy()
#    ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export ferc region','export ferc region abbr'])['value'].sum().reset_index()
    ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export BAA'])['value'].sum().reset_index()
    ferc_final_trade = ferc_final_trade.merge(ferc_import_grouped_tot, left_on = 'import ferc region', right_on = 'import ferc region')
    ferc_final_trade = ferc_final_trade.rename(columns = {'value_x':'value','value_y':'total'})
    ferc_final_trade['fraction'] = ferc_final_trade['value']/ferc_final_trade['total']
    ferc_final_trade = ferc_final_trade.fillna(value = 0)
    ferc_final_trade = ferc_final_trade.drop(columns = ['value', 'total'])
    # Remove Canadian entry in import list
    ferc_list.remove('CAN')
    ferc_filt = ferc_final_trade['import ferc region abbr'].isin(ferc_list)
    ferc_final_trade = ferc_final_trade[ferc_filt]
    ferc_final_trade.to_csv(output_dir + '/ferc_final_trade_{}.csv'.format(year))
    ferc_final_trade["export_name"]=ferc_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return ferc_final_trade
#    elif subregion== 'US':
    us_import_grouped_tot = df_final_trade_out_filt_melted_merge['value'].sum()
    us_final_trade = df_final_trade_out_filt_melted_merge.copy()
    us_final_trade = us_final_trade.groupby(['export BAA'])['value'].sum().reset_index()
    us_final_trade["fraction"]=us_final_trade["value"]/us_import_grouped_tot
    us_final_trade = us_final_trade.fillna(value = 0)
    us_final_trade=us_final_trade.drop(columns = ["value"])
    us_final_trade["export_name"]=us_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return us_final_trade
    return {'BA':BAA_final_trade,'FERC':ferc_final_trade,'US':us_final_trade}