Python validate_dtypes示例，gridpath.auxiliary.validations.validate_dtypes Python示例

示例#1

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    :param subscenarios:
    :param subproblem:
    :param stage:
    :param conn:
    :return:
    """
    availabilities = get_inputs_from_database(scenario_id, subscenarios,
                                              subproblem, stage, conn)

    df = cursor_to_df(availabilities)
    idx_cols = ["project", "timepoint"]
    value_cols = ["availability_derate"]

    # Check data types availability
    expected_dtypes = get_expected_dtypes(conn, [
        "inputs_project_availability", "inputs_project_availability_exogenous"
    ])
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_exogenous",
        severity="High",
        errors=dtype_errors,
    )

    # Check for missing inputs
    msg = ("If not specified, availability is assumed to be 100%. If you "
           "don't want to specify any availability derates, simply leave the "
           "exogenous_availability_scenario_id empty and this message will "
           "disappear.")
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_exogenous",
        severity="Low",
        errors=validate_missing_inputs(df, value_cols, idx_cols, msg),
    )

    # Check for correct sign
    if "availability" not in error_columns:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_availability_exogenous",
            severity="High",
            errors=validate_values(df, value_cols, min=0, max=1),
        )

示例#2

0

显示文件

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    :param subscenarios:
    :param subproblem:
    :param stage:
    :param conn:
    :return:
    """

    params = get_inputs_from_database(scenario_id, subscenarios, subproblem,
                                      stage, conn)

    df = cursor_to_df(params)

    # Check data types availability
    expected_dtypes = get_expected_dtypes(conn, [
        "inputs_project_availability", "inputs_project_availability_endogenous"
    ])
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="High",
        errors=dtype_errors)

    # Check for missing inputs
    msg = ""
    value_cols = [
        "unavailable_hours_per_period", "unavailable_hours_per_event_min",
        "available_hours_between_events_min"
    ]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="Low",
        errors=validate_missing_inputs(df, value_cols, "project", msg))

    cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability_endogenous",
        severity="High",
        errors=validate_column_monotonicity(df=df,
                                            cols=cols,
                                            idx_col=["project"]))

示例#3

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    # TODO: check that hours in full period is within x and y
    #  ("within" check or "validate" check in param definition returns obscure
    #  error message that isn't helpful).

    periods = get_inputs_from_database(scenario_id, subscenarios, subproblem,
                                       stage, conn)

    df = cursor_to_df(periods)

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(conn=conn,
                                          tables=["inputs_temporal_periods"])
    # Hard-code data type for hours_in_period_timepoints
    expected_dtypes["hours_in_period_timepoints"] = "numeric"

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_temporal_periods",
        severity="High",
        errors=dtype_errors,
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [
        c for c in df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_temporal_periods",
        severity="Mid",
        errors=validate_values(df, valid_numeric_columns, "period", min=0),
    )

示例#4

0

显示文件

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    gen_ret_bin_params = get_module_specific_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn)

    projects = get_projects(conn, scenario_id, subscenarios, "capacity_type",
                            "gen_ret_bin")

    # Convert input data into pandas DataFrame and extract data
    df = cursor_to_df(gen_ret_bin_params)
    spec_projects = df["project"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn,
        tables=[
            "inputs_project_specified_capacity",
            "inputs_project_specified_fixed_cost"
        ])

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_specified_capacity, "
                                 "inputs_project_specified_fixed_cost",
                                 severity="High",
                                 errors=dtype_errors)

    # Check valid numeric columns are non-negative
    numeric_columns = [
        c for c in df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_specified_capacity, "
                                 "inputs_project_specified_fixed_cost",
                                 severity="High",
                                 errors=validate_values(df,
                                                        valid_numeric_columns,
                                                        min=0))

    # Ensure project capacity & fixed cost is specified in at least 1 period
    msg = "Expected specified capacity & fixed costs for at least one period."
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_specified_capacity, "
                                 "inputs_project_specified_fixed_cost",
                                 severity="High",
                                 errors=validate_idxs(
                                     actual_idxs=spec_projects,
                                     req_idxs=projects,
                                     idx_label="project",
                                     msg=msg))

    # Check for missing values (vs. missing row entries above)
    cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"]
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_specified_capacity, "
                                 "inputs_project_specified_fixed_cost",
                                 severity="High",
                                 errors=validate_missing_inputs(df, cols))

示例#5

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """
    new_stor_costs = get_model_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn
    )

    projects = get_projects(
        conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin"
    )

    # Convert input data into pandas DataFrame
    cost_df = cursor_to_df(new_stor_costs)
    df_cols = cost_df.columns

    # get the project lists
    cost_projects = cost_df["project"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"]
    )

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=dtype_errors,
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=validate_values(cost_df, valid_numeric_columns, min=0),
    )

    # Check that all binary new build projects are available in >=1 vintage
    msg = "Expected cost data for at least one vintage."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="Mid",
        errors=validate_idxs(
            actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg
        ),
    )

    cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"]
    # Check that maximum new build doesn't decrease
    if cols[1] in df_cols:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="Mid",
            errors=validate_row_monotonicity(
                df=cost_df, col=cols[1], rank_col="vintage"
            ),
        )

    # check that min build <= max build
    if set(cols).issubset(set(df_cols)):
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="High",
            errors=validate_column_monotonicity(
                df=cost_df, cols=cols, idx_col=["project", "vintage"]
            ),
        )

    cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"]
    # Check that maximum new build doesn't decrease - MWh
    if cols[1] in df_cols:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="Mid",
            errors=validate_row_monotonicity(
                df=cost_df, col=cols[1], rank_col="vintage"
            ),
        )

    # check that min build <= max build - MWh
    if set(cols).issubset(set(df_cols)):
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_project_new_potential",
            severity="High",
            errors=validate_column_monotonicity(
                df=cost_df, cols=cols, idx_col=["project", "vintage"]
            ),
        )

示例#6

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    c = conn.cursor()

    # Get the transmission inputs
    transmission_lines = get_inputs_from_database(scenario_id, subscenarios,
                                                  subproblem, stage, conn)

    # Convert input data into pandas DataFrame
    df = cursor_to_df(transmission_lines)

    # Check data types:
    expected_dtypes = get_expected_dtypes(
        conn,
        [
            "inputs_transmission_portfolios",
            "inputs_transmission_availability",
            "inputs_transmission_load_zones",
            "inputs_transmission_operational_chars",
        ],
    )

    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_portfolios, "
        "inputs_transmission_load_zones, "
        "inputs_transmission_operational_chars",
        severity="High",
        errors=dtype_errors,
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [
        c for c in df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)

    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_operational_chars",
        severity="High",
        errors=validate_values(df, valid_numeric_columns, min=0),
    )

    # Ensure we're not combining incompatible capacity and operational types
    cols = ["capacity_type", "operational_type"]
    invalid_combos = c.execute("""
        SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos
        """.format(",".join(cols))).fetchall()
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table=
        "inputs_transmission_operational_chars, inputs_tranmission_portfolios",
        severity="High",
        errors=validate_columns(df, cols, invalids=invalid_combos),
    )

    # Check reactance > 0
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_operational_chars",
        severity="High",
        errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True),
    )

    # Check that all portfolio tx lines are present in the opchar inputs
    msg = ("All tx lines in the portfolio should have an operational type "
           "specified in the inputs_transmission_operational_chars table.")
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_operational_chars",
        severity="High",
        errors=validate_missing_inputs(df, ["operational_type"],
                                       idx_col="transmission_line",
                                       msg=msg),
    )

    # Check that all portfolio tx lines are present in the load zone inputs
    msg = ("All tx lines in the portfolio should have a load zone from/to "
           "specified in the inputs_transmission_load_zones table.")
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_load_zones",
        severity="High",
        errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"],
                                       idx_col="transmission_line",
                                       msg=msg),
    )

    # Check that all tx load zones are part of the active load zones
    load_zones = get_load_zones(conn, subscenarios)
    for col in ["load_zone_from", "load_zone_to"]:
        write_validation_to_database(
            conn=conn,
            scenario_id=scenario_id,
            subproblem_id=subproblem,
            stage_id=stage,
            gridpath_module=__name__,
            db_table="inputs_transmission_load_zones",
            severity="High",
            errors=validate_columns(df, col, valids=load_zones),
        )

示例#7

0

显示文件

文件： __init__.py 项目： yangqiu91/gridpath

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    c = conn.cursor()

    # Get the project inputs
    projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn)

    # Convert input data into pandas DataFrame
    df = cursor_to_df(projects)

    # Check data types:
    expected_dtypes = get_expected_dtypes(
        conn, ["inputs_project_portfolios",
               "inputs_project_availability",
               "inputs_project_load_zones",
               "inputs_project_operational_chars"]
    )

    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_operational_chars, inputs_project_portfolios",
        severity="High",
        errors=dtype_errors
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)

    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_operational_chars",
        severity="High",
        errors=validate_values(df, valid_numeric_columns, min=0)
    )

    # Check that we're not combining incompatible cap-types and op-types
    cols = ["capacity_type", "operational_type"]
    invalid_combos = c.execute(
        """
        SELECT {} FROM mod_capacity_and_operational_type_invalid_combos
        """.format(",".join(cols))
    ).fetchall()

    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_operational_chars, inputs_project_portfolios",
        severity="High",
        errors=validate_columns(df, cols, invalids=invalid_combos)
    )

    # Check that capacity type is valid
    # Note: foreign key already ensures this!
    valid_cap_types = c.execute(
        """SELECT capacity_type from mod_capacity_types"""
    ).fetchall()
    valid_cap_types = [v[0] for v in valid_cap_types]

    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_portfolios",
        severity="High",
        errors=validate_columns(df, "capacity_type", valids=valid_cap_types)
    )

    # Check that operational type is valid
    # Note: foreign key already ensures this!
    valid_op_types = c.execute(
        """SELECT operational_type from mod_operational_types"""
    ).fetchall()
    valid_op_types = [v[0] for v in valid_op_types]

    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_portfolios",
        severity="High",
        errors=validate_columns(df, "operational_type", valids=valid_op_types)
    )

    # Check that all portfolio projects are present in the availability inputs
    msg = "All projects in the portfolio should have an availability type " \
          "specified in the inputs_project_availability table."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_availability",
        severity="High",
        errors=validate_missing_inputs(df, "availability_type", msg=msg)
    )

    # Check that all portfolio projects are present in the opchar inputs
    msg = "All projects in the portfolio should have an operational type " \
          "and balancing type specified in the " \
          "inputs_project_operational_chars table."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_operational_chars",
        severity="High",
        errors=validate_missing_inputs(df,
                                       ["operational_type",
                                        "balancing_type_project"],
                                       msg=msg)
    )

    # Check that all portfolio projects are present in the load zone inputs
    msg = "All projects in the portfolio should have a load zone " \
          "specified in the inputs_project_load_zones table."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_load_zones",
        severity="High",
        errors=validate_missing_inputs(df, "load_zone", msg=msg)
    )

示例#8

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    # Get the fuel input data
    fuels, fuel_prices = get_inputs_from_database(scenario_id, subscenarios,
                                                  subproblem, stage, conn)

    # Get the projects fuels
    c1 = conn.cursor()
    projects = c1.execute("""SELECT project, fuel
        FROM inputs_project_portfolios
        INNER JOIN
        (SELECT project, fuel
        FROM inputs_project_operational_chars
        WHERE project_operational_chars_scenario_id = {}
        AND fuel IS NOT NULL) AS op_char
        USING (project)
        WHERE project_portfolio_scenario_id = {}""".format(
        subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID,
        subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID))

    # Get the relevant periods and months
    c2 = conn.cursor()
    periods_months = c2.execute("""SELECT DISTINCT period, month
        FROM inputs_temporal
        WHERE temporal_scenario_id = {}
        AND subproblem_id = {}
        AND stage_id = {};""".format(subscenarios.TEMPORAL_SCENARIO_ID,
                                     subproblem, stage))

    # Convert input data into pandas DataFrame
    fuels_df = cursor_to_df(fuels)
    fuel_prices_df = cursor_to_df(fuel_prices)
    prj_df = cursor_to_df(projects)

    # Get relevant lists
    fuels = fuels_df["fuel"].to_list()
    actual_fuel_periods_months = list(
        fuel_prices_df[["fuel", "period", "month"]].itertuples(index=False,
                                                               name=None))
    req_fuel_periods_months = [(f, p, m) for (p, m) in periods_months
                               for f in fuels]

    # Check data types
    expected_dtypes = get_expected_dtypes(
        conn, ["inputs_project_fuels", "inputs_project_fuel_prices"])

    dtype_errors, error_columns = validate_dtypes(fuels_df, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_fuels",
                                 severity="High",
                                 errors=dtype_errors)

    dtype_errors, error_columns = validate_dtypes(fuel_prices_df,
                                                  expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_fuel_prices",
                                 severity="High",
                                 errors=dtype_errors)

    # TODO: couldn't this be a simple foreign key or is NULL not allowed then?
    # TODO: should this check be in projects.init instead?
    # Check that fuels specified for projects are valid fuels
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_operational_chars",
                                 severity="High",
                                 errors=validate_columns(prj_df,
                                                         "fuel",
                                                         valids=fuels))

    # Check that fuel prices exist for the period and month
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_fuel_prices",
                                 severity="High",
                                 errors=validate_idxs(
                                     actual_idxs=actual_fuel_periods_months,
                                     req_idxs=req_fuel_periods_months,
                                     idx_label="(fuel, period, month)"))

示例#9

0

显示文件

文件： hurdle_costs.py 项目： milindsmart/gridpath

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    hurdle_rates = get_inputs_from_database(scenario_id, subscenarios,
                                            subproblem, stage, conn)

    df = cursor_to_df(hurdle_rates)

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_transmission_hurdle_rates"])

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_hurdle_rates",
        severity="High",
        errors=dtype_errors,
    )

    # Check valid numeric columns are non-negative
    numeric_columns = [
        c for c in df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_hurdle_rates",
        severity="High",
        errors=validate_values(df,
                               valid_numeric_columns,
                               "transmission_line",
                               min=0),
    )

    # Check that all binary new build tx lines are available in >=1 vintage
    msg = ("Expected hurdle rates specified for each modeling period when "
           "transmission hurdle rates feature is on.")
    cols = [
        "hurdle_rate_positive_direction_per_mwh",
        "hurdle_rate_negative_direction_per_mwh",
    ]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_hurdle_rates",
        severity="Low",
        errors=validate_missing_inputs(df=df,
                                       col=cols,
                                       idx_col=["transmission_line", "period"],
                                       msg=msg),
    )

示例#10

0

显示文件

文件： tx_spec.py 项目： yangqiu91/gridpath

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    tx_capacities = get_module_specific_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn)

    tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type",
                            "tx_spec")

    # Convert input data into pandas DataFrame and extract data
    df = cursor_to_df(tx_capacities)
    spec_tx_lines = df["transmission_line"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_transmission_specified_capacity"])

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=dtype_errors)

    # Ensure tx_line capacity is specified in at least 1 period
    msg = "Expected specified capacity for at least one period."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=validate_idxs(actual_idxs=spec_tx_lines,
                             req_idxs=tx_lines,
                             idx_label="transmission_line",
                             msg=msg))

    # Check for missing values (vs. missing row entries above)
    cols = ["min_mw", "max_mw"]
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_transmission_specified_capacity",
        severity="High",
        errors=validate_missing_inputs(df, cols))

    # check that min <= max
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_new_potential",
                                 severity="High",
                                 errors=validate_column_monotonicity(
                                     df=df,
                                     cols=cols,
                                     idx_col=["project", "period"]))

示例#11

0

显示文件

文件： stor_new_bin.py 项目： yangqiu91/gridpath

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    # TODO: check that there are no minimum duration inputs for this type
    #   (duration is specified by specifying the build size in mw and mwh)
    #   Maybe also check all other required / not required inputs?
    #   --> see example in gen_must_run operational_type. Seems very verbose and
    #   hard to maintain. Is there a way to generalize this?

    # Get the binary build generator inputs
    new_stor_costs, new_stor_build_size = \
        get_module_specific_inputs_from_database(
            scenario_id, subscenarios, subproblem, stage, conn)

    projects = get_projects(conn, scenario_id, subscenarios, "capacity_type",
                            "stor_new_bin")

    # Convert input data into pandas DataFrame
    cost_df = cursor_to_df(new_stor_costs)
    bld_size_df = cursor_to_df(new_stor_build_size)

    # get the project lists
    cost_projects = cost_df["project"].unique()
    bld_size_projects = bld_size_df["project"]

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn,
        tables=[
            "inputs_project_new_cost", "inputs_project_new_binary_build_size"
        ])

    # Check dtypes - cost_df
    dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_new_cost",
                                 severity="High",
                                 errors=dtype_errors)

    # Check valid numeric columns are non-negative - cost_df
    numeric_columns = [
        c for c in cost_df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_new_cost",
                                 severity="High",
                                 errors=validate_values(cost_df,
                                                        valid_numeric_columns,
                                                        min=0))

    # Check dtypes - bld_size_df
    dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=dtype_errors)

    # Check valid numeric columns are non-negative - bld_size_df
    numeric_columns = [
        c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=validate_values(bld_size_df, valid_numeric_columns, min=0))

    # Check that all binary new build projects are available in >=1 vintage
    msg = "Expected cost data for at least one vintage."
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_project_new_cost",
                                 severity="High",
                                 errors=validate_idxs(
                                     actual_idxs=cost_projects,
                                     req_idxs=projects,
                                     idx_label="project",
                                     msg=msg))

    # Check that all binary new build projects have build size specified
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=validate_idxs(actual_idxs=bld_size_projects,
                             req_idxs=projects,
                             idx_label="project"))

示例#12

0

显示文件

文件： tx_new_lin.py 项目： yangqiu91/gridpath

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem,
                                    stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    tx_cost = get_module_specific_inputs_from_database(scenario_id,
                                                       subscenarios,
                                                       subproblem, stage, conn)

    tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type",
                            "tx_new_lin")

    # Convert input data into pandas DataFrame
    df = cursor_to_df(tx_cost)

    # get the tx lines lists
    tx_lines_w_cost = df["transmission_line"].unique()

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn, tables=["inputs_transmission_new_cost"])

    # Check dtypes
    dtype_errors, error_columns = validate_dtypes(df, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_transmission_new_cost",
                                 severity="High",
                                 errors=dtype_errors)

    # Check valid numeric columns are non-negative
    numeric_columns = [
        c for c in df.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_transmission_new_cost",
                                 severity="High",
                                 errors=validate_values(df,
                                                        valid_numeric_columns,
                                                        "transmission_line",
                                                        min=0))

    # Check that all binary new build tx lines are available in >=1 vintage
    msg = "Expected cost data for at least one vintage."
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_transmission_new_cost",
                                 severity="Mid",
                                 errors=validate_idxs(
                                     actual_idxs=tx_lines_w_cost,
                                     req_idxs=tx_lines,
                                     idx_label="transmission_line",
                                     msg=msg))

示例#13

0

显示文件

def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios,
                                              subproblem, stage, conn)

    c = conn.cursor()
    periods_horizons = c.execute("""
        SELECT balancing_type_horizon, period, horizon
        FROM periods_horizons
        WHERE temporal_scenario_id = {}
        AND subproblem_id = {}
        and stage_id = {}
        """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage))

    df_hrzs = cursor_to_df(hrzs)
    df_hrz_tmps = cursor_to_df(hrz_tmps)
    df_periods_hrzs = cursor_to_df(periods_horizons)

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn,
        tables=[
            "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints"
        ])

    # Check dtypes horizons
    dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_temporal_horizons",
                                 severity="High",
                                 errors=dtype_errors)

    # Check dtypes horizon_timepoints
    dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_temporal_horizon_timepoints",
                                 severity="High",
                                 errors=dtype_errors)

    # Check valid numeric columns are non-negative - horizons
    numeric_columns = [
        c for c in df_hrzs.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(conn=conn,
                                 scenario_id=scenario_id,
                                 subproblem_id=subproblem,
                                 stage_id=stage,
                                 gridpath_module=__name__,
                                 db_table="inputs_temporal_horizons",
                                 severity="Mid",
                                 errors=validate_values(df_hrzs,
                                                        valid_numeric_columns,
                                                        "horizon",
                                                        min=0))

    # Check valid numeric columns are non-negative - horizon_timepoints
    numeric_columns = [
        c for c in df_hrzs.columns if expected_dtypes[c] == "numeric"
    ]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_temporal_horizon_timepoints",
        severity="Mid",
        errors=validate_values(df_hrz_tmps,
                               valid_numeric_columns, ["horizon", "timepoint"],
                               min=0))

    # One horizon cannot straddle multiple periods
    msg = "All timepoints within a horizon should belong to the same period."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_temporal_horizon_timepoints",
        severity="High",
        errors=validate_single_input(
            df=df_periods_hrzs,
            idx_col=["balancing_type_horizon", "horizon"],
            msg=msg))

    # Make sure there are no missing horizon inputs
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_temporal_horizon_timepoints",
        severity="High",
        errors=validate_missing_inputs(
            df=df_hrz_tmps,
            col="horizon",
            idx_col=["balancing_type_horizon", "timepoint"]))

示例#14

0

显示文件

文件： gen_new_bin.py 项目： yangqiu91/gridpath

def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn):
    """
    Get inputs from database and validate the inputs
    :param subscenarios: SubScenarios object with all subscenario info
    :param subproblem:
    :param stage:
    :param conn: database connection
    :return:
    """

    # Get the binary build generator inputs
    new_gen_costs, new_build_size = get_module_specific_inputs_from_database(
        scenario_id, subscenarios, subproblem, stage, conn)

    projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin")

    # Convert input data into pandas DataFrame
    cost_df = cursor_to_df(new_gen_costs)
    bld_size_df = cursor_to_df(new_build_size)

    # get the project lists
    cost_projects = cost_df["project"].unique()
    bld_size_projects = bld_size_df["project"]

    # Get expected dtypes
    expected_dtypes = get_expected_dtypes(
        conn=conn,
        tables=["inputs_project_new_cost",
                "inputs_project_new_binary_build_size"]
    )

    # Check dtypes - cost_df
    dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=dtype_errors
    )

    # Check valid numeric columns are non-negative - cost_df
    numeric_columns = [c for c in cost_df.columns
                       if expected_dtypes[c] == "numeric"]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="High",
        errors=validate_values(cost_df, valid_numeric_columns, min=0)
    )

    # Check dtypes - bld_size_df
    dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=dtype_errors
    )

    # Check valid numeric columns are non-negative - bld_size_df
    numeric_columns = [c for c in bld_size_df.columns
                       if expected_dtypes[c] == "numeric"]
    valid_numeric_columns = set(numeric_columns) - set(error_columns)
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=validate_values(bld_size_df, valid_numeric_columns, min=0)
    )

    # Check that all binary new build projects are available in >=1 vintage
    msg = "Expected cost data for at least one vintage."
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_cost",
        severity="Mid",
        errors=validate_idxs(actual_idxs=cost_projects,
                             req_idxs=projects,
                             idx_label="project",
                             msg=msg)
    )

    # Check that all binary new build projects have build size specified
    write_validation_to_database(
        conn=conn,
        scenario_id=scenario_id,
        subproblem_id=subproblem,
        stage_id=stage,
        gridpath_module=__name__,
        db_table="inputs_project_new_binary_build_size",
        severity="High",
        errors=validate_idxs(actual_idxs=bld_size_projects,
                             req_idxs=projects,
                             idx_label="project")
    )

示例#15

0

显示文件

文件： test_validations.py 项目： yangqiu91/gridpath

    def test_validate_dtypes(self):
        """

        :return:
        """
        df_columns = ["project", "capacity"]
        test_cases = {
            # Make sure correct inputs don't throw error
            1: {"df": pd.DataFrame(
                    columns=df_columns,
                    data=[["gas_ct", 10], ["coal_plant", 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": ([], [])
                },
            # Test invalid string column
            2: {"df": pd.DataFrame(
                columns=df_columns,
                data=[["gas_ct", 10], ["coal_plant", "string"]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": (
                    ["Invalid data type for column 'capacity'; expected numeric"],
                    ["capacity"]
                )},
            # Test invalid numeric column
            3: {"df": pd.DataFrame(
                columns=df_columns,
                data=[[1, 10], [1, 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": (
                    ["Invalid data type for column 'project'; expected string"],
                    ["project"]
                )},
            # If at least one string in the column, pandas will convert
            # all column data to string so there will be no error
            4: {"df": pd.DataFrame(
                columns=df_columns,
                data=[["gas_ct", 10], [1, 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": ([], [])
                },
            # Columns with all None are ignored
            5: {"df": pd.DataFrame(
                columns=df_columns,
                data=[[None, 10], [None, 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": ([], [])
                },
            # Columns with all NaN are ignored
            6: {"df": pd.DataFrame(
                columns=df_columns,
                data=[[np.nan, 10], [np.nan, 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": ([], [])
                },
            # Columns with some None are not ignored
            7: {"df": pd.DataFrame(
                columns=df_columns,
                data=[[10, 10], [None, 20]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": (
                    ["Invalid data type for column 'project'; expected string"],
                    ["project"]
                )},
            # Test multiple error columns
            8: {"df": pd.DataFrame(
                columns=df_columns,
                data=[[10, "string"], [10, "string"]]),
                "expected_dtypes": {
                    "project": "string",
                    "capacity": "numeric"},
                "result": (
                    ["Invalid data type for column 'project'; expected string",
                     "Invalid data type for column 'capacity'; expected numeric"],
                    ["project", "capacity"]
                )}
        }

        for test_case in test_cases.keys():
            expected_tuple = test_cases[test_case]["result"]
            actual_tuple = module_to_test.validate_dtypes(
                df=test_cases[test_case]["df"],
                expected_dtypes=test_cases[test_case]["expected_dtypes"]
            )
            self.assertTupleEqual(expected_tuple, actual_tuple)