Exemplo n.º 1
0
def create_generation_process_df():
    """
    Reads emissions and generation data from different sources to provide
    facility-level emissions. Most important inputs to this process come
    from the model configuration file.

    Parameters
    ----------
    None

    Returns
    ----------
    dataframe
        Datafrane includes all facility-level emissions
    """
    from electricitylci.eia923_generation import (build_generation_data,
                                                  eia923_primary_fuel)
    from electricitylci.egrid_filter import (
        egrid_facilities_to_include,
        emissions_and_waste_for_selected_egrid_facilities,
    )
    from electricitylci.generation import (
        egrid_facilities_w_fuel_region,
        add_technological_correlation_score,
        add_temporal_correlation_score,
    )
    import electricitylci.emissions_other_sources as em_other
    import electricitylci.ampd_plant_emissions as ampd
    from electricitylci.combinator import ba_codes
    import electricitylci.manual_edits as edits

    COMPARTMENT_DICT = {
        "emission/air": "air",
        "emission/water": "water",
        "emission/ground": "ground",
        "input": "input",
        "output": "output",
        "waste": "waste",
        "air": "air",
        "water": "water",
        "ground": "ground",
    }
    if model_specs.replace_egrid:
        generation_data = build_generation_data().drop_duplicates()
        cems_df = ampd.generate_plant_emissions(model_specs.eia_gen_year)
        cems_df.drop(columns=["FlowUUID"], inplace=True)
        emissions_and_waste_for_selected_egrid_facilities = em_other.integrate_replace_emissions(
            cems_df, emissions_and_waste_for_selected_egrid_facilities)
    else:
        from electricitylci.egrid_filter import electricity_for_selected_egrid_facilities
        generation_data = electricity_for_selected_egrid_facilities
        generation_data["Year"] = model_specs.egrid_year
        generation_data["FacilityID"] = generation_data["FacilityID"].astype(
            int)


#        generation_data = build_generation_data(
#            egrid_facilities_to_include=egrid_facilities_to_include
#        )
    emissions_and_waste_for_selected_egrid_facilities.drop(
        columns=["FacilityID"])
    emissions_and_waste_for_selected_egrid_facilities[
        "eGRID_ID"] = emissions_and_waste_for_selected_egrid_facilities[
            "eGRID_ID"].astype(int)
    final_database = pd.merge(
        left=emissions_and_waste_for_selected_egrid_facilities,
        right=generation_data,
        right_on=["FacilityID", "Year"],
        left_on=["eGRID_ID", "Year"],
        how="left",
    )
    egrid_facilities_w_fuel_region[
        "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype(
            int)
    final_database = pd.merge(
        left=final_database,
        right=egrid_facilities_w_fuel_region,
        left_on="eGRID_ID",
        right_on="FacilityID",
        how="left",
        suffixes=["", "_right"],
    )
    if model_specs.replace_egrid:
        primary_fuel_df = eia923_primary_fuel(year=model_specs.eia_gen_year)
        primary_fuel_df.rename(columns={'Plant Id': "eGRID_ID"}, inplace=True)
        primary_fuel_df["eGRID_ID"] = primary_fuel_df["eGRID_ID"].astype(int)
        key_df = (primary_fuel_df[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database["FuelCategory"] = final_database["eGRID_ID"].map(
            key_df["FuelCategory"])
    else:
        key_df = (final_database[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database.loc[final_database["FuelCategory"].isnull(),
                           "FuelCategory"] = final_database.loc[
                               final_database["FuelCategory"].isnull(),
                               "eGRID_ID"].map(key_df["FuelCategory"])
    # if replace_egrid:
    #     final_database["FuelCategory"].fillna(
    #         final_database["FuelCategory_right"], inplace=True
    #     )
    final_database["Final_fuel_agg"] = final_database["FuelCategory"]
    # if model_specs.use_primaryfuel_for_coal:
    #     final_database.loc[
    #         final_database["FuelCategory"] == "COAL", ["Final_fuel_agg"]
    #     ] = final_database.loc[
    #         final_database["FuelCategory"] == "COAL", "PrimaryFuel"
    #     ]
    try:
        year_filter = final_database["Year_x"] == final_database["Year_y"]
        final_database = final_database.loc[year_filter, :]
        final_database.drop(columns="Year_y", inplace=True)
    except KeyError:
        pass
    final_database.rename(columns={"Year_x": "Year"}, inplace=True)
    final_database = map_emissions_to_fedelemflows(final_database)
    dup_cols_check = [
        "FacilityID",
        "FuelCategory",
        "FlowName",
        "FlowAmount",
        "Compartment",
    ]
    final_database = final_database.loc[:,
                                        ~final_database.columns.duplicated()]
    final_database = final_database.drop_duplicates(subset=dup_cols_check)
    final_database.drop(
        columns=["FuelCategory", "FacilityID_x", "FacilityID_y"], inplace=True)
    final_database.rename(
        columns={
            "Final_fuel_agg": "FuelCategory",
            "TargetFlowUUID": "FlowUUID",
        },
        inplace=True,
    )
    final_database = add_temporal_correlation_score(
        final_database, model_specs.electricity_lci_target_year)
    final_database = add_technological_correlation_score(final_database)
    final_database["DataCollection"] = 5
    final_database["GeographicalCorrelation"] = 1

    final_database["eGRID_ID"] = final_database["eGRID_ID"].astype(int)

    final_database.sort_values(by=["eGRID_ID", "Compartment", "FlowName"],
                               inplace=True)
    final_database["stage_code"] = "Power plant"
    final_database["Compartment_path"] = final_database["Compartment"]
    final_database["Compartment"] = final_database["Compartment_path"].map(
        COMPARTMENT_DICT)
    final_database["Balancing Authority Name"] = final_database[
        "Balancing Authority Code"].map(ba_codes["BA_Name"])
    final_database["EIA_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["EIA_Region"])
    final_database["FERC_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["FERC_Region"])
    final_database = edits.check_for_edits(final_database, "generation.py",
                                           "create_generation_process_df")
    return final_database
Exemplo n.º 2
0
def add_flow_representativeness_data_quality_scores(db, total_gen):
    db = add_technological_correlation_score(db)
    db = add_temporal_correlation_score(db)
    db = add_data_collection_score(db, total_gen)
    return db
Exemplo n.º 3
0
def add_fuel_inputs(gen_df, upstream_df, upstream_dict):
    """
    Converts the upstream emissions database to fuel inputs and adds them
    to the generator dataframe. This is in preparation of generating unit
    processes for openLCA.
    Parameters
    ----------
    gen_df : dataframe
        The generator df containing power plant emissions.
    upstream_df : dataframe
        The combined upstream dataframe.
    upstream_dict : dictionary
        This is the dictionary of upstream "unit processes" as generated by
        electricitylci.upstream_dict after the upstream_dict has been written
        to json-ld. This is important because the uuids for the upstream
        "unit processes" are only generated when written to json-ld.

    Returns
    -------
    dataframe
    """
    from electricitylci.generation import (
        add_technological_correlation_score,
        add_temporal_correlation_score,
    )

    upstream_reduced = upstream_df.drop_duplicates(
        subset=["plant_id", "stage_code", "quantity"]
    )
    fuel_df = pd.DataFrame(columns=gen_df.columns)
    # The upstream reduced should only have one instance of each plant/stage code
    # combination. We'll first map the upstream dictionary to each plant
    # and then expand that dictionary into columns we can use. The goal is
    # to generate the fuels and associated metadata with each plant. That will
    # then be merged with the generation database.
    fuel_df["flowdict"] = upstream_reduced["stage_code"].map(upstream_dict)

    expand_fuel_df = fuel_df["flowdict"].apply(pd.Series)
    fuel_df.drop(columns=["flowdict"], inplace=True)

    fuel_df["Compartment"] = "input"
    fuel_df["FlowName"] = expand_fuel_df["q_reference_name"]
    fuel_df["stage_code"] = upstream_reduced["stage_code"]
    fuel_df["FlowAmount"] = upstream_reduced["quantity"]
    fuel_df["FlowUUID"] = expand_fuel_df["q_reference_id"]
    fuel_df["Unit"] = expand_fuel_df["q_reference_unit"]
    fuel_df["eGRID_ID"] = upstream_df["plant_id"]
    fuel_df["FacilityID"] = upstream_df["plant_id"]
    fuel_df["FuelCategory"] = upstream_df["FuelCategory"]
    fuel_df["Year"] = upstream_df["Year"]
    merge_cols = [
        "Age",
        "Balancing Authority Code",
        "Balancing Authority Name",
        "Electricity",
        #        "FRS_ID",
        "NERC",
        "Subregion",
    ]
    fuel_df.drop(columns=merge_cols, inplace=True)
    gen_df_reduced = gen_df[merge_cols + ["eGRID_ID"]].drop_duplicates(
        subset=["eGRID_ID"]
    )

    fuel_df = fuel_df.merge(
        right=gen_df_reduced,
        left_on="eGRID_ID",
        right_on="eGRID_ID",
        how="left",
    )
    fuel_df.dropna(subset=["Electricity"], inplace=True)
    fuel_df["Source"] = "eia"
    fuel_df = add_temporal_correlation_score(fuel_df)
    fuel_df["DataCollection"] = 5
    fuel_df["GeographicalCorrelation"] = 1
    fuel_df["TechnologicalCorrelation"] = 1
    fuel_df["ReliabilityScore"] = 1
    fuel_df["ElementaryFlowPrimeContext"] = "input"
    fuel_cat_key = (
        gen_df[["FacilityID", "FuelCategory"]]
        .drop_duplicates(subset="FacilityID")
        .set_index("FacilityID")
    )
    fuel_df["FuelCategory"] = fuel_df["FacilityID"].map(
        fuel_cat_key["FuelCategory"]
    )
    gen_plus_up_df = pd.concat([gen_df, fuel_df], ignore_index=True)
    gen_plus_up_df = fill_nans(gen_plus_up_df)
    return gen_plus_up_df