def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ availabilities = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(availabilities) idx_cols = ["project", "timepoint"] value_cols = ["availability_derate"] # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_exogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=dtype_errors, ) # Check for missing inputs msg = ("If not specified, availability is assumed to be 100%. If you " "don't want to specify any availability derates, simply leave the " "exogenous_availability_scenario_id empty and this message will " "disappear.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, idx_cols, msg), ) # Check for correct sign if "availability" not in error_columns: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_exogenous", severity="High", errors=validate_values(df, value_cols, min=0, max=1), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ :param subscenarios: :param subproblem: :param stage: :param conn: :return: """ params = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(params) # Check data types availability expected_dtypes = get_expected_dtypes(conn, [ "inputs_project_availability", "inputs_project_availability_endogenous" ]) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=dtype_errors) # Check for missing inputs msg = "" value_cols = [ "unavailable_hours_per_period", "unavailable_hours_per_event_min", "available_hours_between_events_min" ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="Low", errors=validate_missing_inputs(df, value_cols, "project", msg)) cols = ["unavailable_hours_per_event_min", "unavailable_hours_per_period"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability_endogenous", severity="High", errors=validate_column_monotonicity(df=df, cols=cols, idx_col=["project"]))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that hours in full period is within x and y # ("within" check or "validate" check in param definition returns obscure # error message that isn't helpful). periods = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(periods) # Get expected dtypes expected_dtypes = get_expected_dtypes(conn=conn, tables=["inputs_temporal_periods"]) # Hard-code data type for hours_in_period_timepoints expected_dtypes["hours_in_period_timepoints"] = "numeric" # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_periods", severity="Mid", errors=validate_values(df, valid_numeric_columns, "period", min=0), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ gen_ret_bin_params = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_ret_bin") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(gen_ret_bin_params) spec_projects = df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_specified_capacity", "inputs_project_specified_fixed_cost" ]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_values(df, valid_numeric_columns, min=0)) # Ensure project capacity & fixed cost is specified in at least 1 period msg = "Expected specified capacity & fixed costs for at least one period." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_idxs( actual_idxs=spec_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["specified_capacity_mw", "annual_fixed_cost_per_mw_year"] write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_specified_capacity, " "inputs_project_specified_fixed_cost", severity="High", errors=validate_missing_inputs(df, cols))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ new_stor_costs = get_model_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn ) projects = get_projects( conn, scenario_id, subscenarios, "capacity_type", "stor_new_lin" ) # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) df_cols = cost_df.columns # get the project lists cost_projects = cost_df["project"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_potential"] ) # Check dtypes dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0), ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg ), ) cols = ["min_cumulative_new_build_mw", "max_cumulative_new_build_mw"] # Check that maximum new build doesn't decrease if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), ) cols = ["min_cumulative_new_build_mwh", "max_cumulative_new_build_mwh"] # Check that maximum new build doesn't decrease - MWh if cols[1] in df_cols: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="Mid", errors=validate_row_monotonicity( df=cost_df, col=cols[1], rank_col="vintage" ), ) # check that min build <= max build - MWh if set(cols).issubset(set(df_cols)): write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=cost_df, cols=cols, idx_col=["project", "vintage"] ), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the transmission inputs transmission_lines = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(transmission_lines) # Check data types: expected_dtypes = get_expected_dtypes( conn, [ "inputs_transmission_portfolios", "inputs_transmission_availability", "inputs_transmission_load_zones", "inputs_transmission_operational_chars", ], ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_portfolios, " "inputs_transmission_load_zones, " "inputs_transmission_operational_chars", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0), ) # Ensure we're not combining incompatible capacity and operational types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute(""" SELECT {} FROM mod_tx_capacity_and_tx_operational_type_invalid_combos """.format(",".join(cols))).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table= "inputs_transmission_operational_chars, inputs_tranmission_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos), ) # Check reactance > 0 write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_values(df, ["reactance_ohms"], min=0, strict_min=True), ) # Check that all portfolio tx lines are present in the opchar inputs msg = ("All tx lines in the portfolio should have an operational type " "specified in the inputs_transmission_operational_chars table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type"], idx_col="transmission_line", msg=msg), ) # Check that all portfolio tx lines are present in the load zone inputs msg = ("All tx lines in the portfolio should have a load zone from/to " "specified in the inputs_transmission_load_zones table.") write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_missing_inputs(df, ["load_zone_from", "load_zone_to"], idx_col="transmission_line", msg=msg), ) # Check that all tx load zones are part of the active load zones load_zones = get_load_zones(conn, subscenarios) for col in ["load_zone_from", "load_zone_to"]: write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_load_zones", severity="High", errors=validate_columns(df, col, valids=load_zones), )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ c = conn.cursor() # Get the project inputs projects = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Convert input data into pandas DataFrame df = cursor_to_df(projects) # Check data types: expected_dtypes = get_expected_dtypes( conn, ["inputs_project_portfolios", "inputs_project_availability", "inputs_project_load_zones", "inputs_project_operational_chars"] ) dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative numeric_columns = [c for c in df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_values(df, valid_numeric_columns, min=0) ) # Check that we're not combining incompatible cap-types and op-types cols = ["capacity_type", "operational_type"] invalid_combos = c.execute( """ SELECT {} FROM mod_capacity_and_operational_type_invalid_combos """.format(",".join(cols)) ).fetchall() write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars, inputs_project_portfolios", severity="High", errors=validate_columns(df, cols, invalids=invalid_combos) ) # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "capacity_type", valids=valid_cap_types) ) # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_portfolios", severity="High", errors=validate_columns(df, "operational_type", valids=valid_op_types) ) # Check that all portfolio projects are present in the availability inputs msg = "All projects in the portfolio should have an availability type " \ "specified in the inputs_project_availability table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_availability", severity="High", errors=validate_missing_inputs(df, "availability_type", msg=msg) ) # Check that all portfolio projects are present in the opchar inputs msg = "All projects in the portfolio should have an operational type " \ "and balancing type specified in the " \ "inputs_project_operational_chars table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_missing_inputs(df, ["operational_type", "balancing_type_project"], msg=msg) ) # Check that all portfolio projects are present in the load zone inputs msg = "All projects in the portfolio should have a load zone " \ "specified in the inputs_project_load_zones table." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_load_zones", severity="High", errors=validate_missing_inputs(df, "load_zone", msg=msg) )
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the fuel input data fuels, fuel_prices = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) # Get the projects fuels c1 = conn.cursor() projects = c1.execute("""SELECT project, fuel FROM inputs_project_portfolios INNER JOIN (SELECT project, fuel FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {} AND fuel IS NOT NULL) AS op_char USING (project) WHERE project_portfolio_scenario_id = {}""".format( subscenarios.PROJECT_OPERATIONAL_CHARS_SCENARIO_ID, subscenarios.PROJECT_PORTFOLIO_SCENARIO_ID)) # Get the relevant periods and months c2 = conn.cursor() periods_months = c2.execute("""SELECT DISTINCT period, month FROM inputs_temporal WHERE temporal_scenario_id = {} AND subproblem_id = {} AND stage_id = {};""".format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) # Convert input data into pandas DataFrame fuels_df = cursor_to_df(fuels) fuel_prices_df = cursor_to_df(fuel_prices) prj_df = cursor_to_df(projects) # Get relevant lists fuels = fuels_df["fuel"].to_list() actual_fuel_periods_months = list( fuel_prices_df[["fuel", "period", "month"]].itertuples(index=False, name=None)) req_fuel_periods_months = [(f, p, m) for (p, m) in periods_months for f in fuels] # Check data types expected_dtypes = get_expected_dtypes( conn, ["inputs_project_fuels", "inputs_project_fuel_prices"]) dtype_errors, error_columns = validate_dtypes(fuels_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuels", severity="High", errors=dtype_errors) dtype_errors, error_columns = validate_dtypes(fuel_prices_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=dtype_errors) # TODO: couldn't this be a simple foreign key or is NULL not allowed then? # TODO: should this check be in projects.init instead? # Check that fuels specified for projects are valid fuels write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_operational_chars", severity="High", errors=validate_columns(prj_df, "fuel", valids=fuels)) # Check that fuel prices exist for the period and month write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_fuel_prices", severity="High", errors=validate_idxs( actual_idxs=actual_fuel_periods_months, req_idxs=req_fuel_periods_months, idx_label="(fuel, period, month)"))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hurdle_rates = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) df = cursor_to_df(hurdle_rates) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_hurdle_rates"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=dtype_errors, ) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0), ) # Check that all binary new build tx lines are available in >=1 vintage msg = ("Expected hurdle rates specified for each modeling period when " "transmission hurdle rates feature is on.") cols = [ "hurdle_rate_positive_direction_per_mwh", "hurdle_rate_negative_direction_per_mwh", ] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_hurdle_rates", severity="Low", errors=validate_missing_inputs(df=df, col=cols, idx_col=["transmission_line", "period"], msg=msg), )
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_capacities = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_spec") # Convert input data into pandas DataFrame and extract data df = cursor_to_df(tx_capacities) spec_tx_lines = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_specified_capacity"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=dtype_errors) # Ensure tx_line capacity is specified in at least 1 period msg = "Expected specified capacity for at least one period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_idxs(actual_idxs=spec_tx_lines, req_idxs=tx_lines, idx_label="transmission_line", msg=msg)) # Check for missing values (vs. missing row entries above) cols = ["min_mw", "max_mw"] write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_specified_capacity", severity="High", errors=validate_missing_inputs(df, cols)) # check that min <= max write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_potential", severity="High", errors=validate_column_monotonicity( df=df, cols=cols, idx_col=["project", "period"]))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # TODO: check that there are no minimum duration inputs for this type # (duration is specified by specifying the build size in mw and mwh) # Maybe also check all other required / not required inputs? # --> see example in gen_must_run operational_type. Seems very verbose and # hard to maintain. Is there a way to generalize this? # Get the binary build generator inputs new_stor_costs, new_stor_build_size = \ get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "stor_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_stor_costs) bld_size_df = cursor_to_df(new_stor_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_project_new_cost", "inputs_project_new_binary_build_size" ]) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - cost_df numeric_columns = [ c for c in cost_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0)) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [ c for c in bld_size_df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0)) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_idxs( actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg)) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project"))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ tx_cost = get_module_specific_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) tx_lines = get_tx_lines(conn, scenario_id, subscenarios, "capacity_type", "tx_new_lin") # Convert input data into pandas DataFrame df = cursor_to_df(tx_cost) # get the tx lines lists tx_lines_w_cost = df["transmission_line"].unique() # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_transmission_new_cost"]) # Check dtypes dtype_errors, error_columns = validate_dtypes(df, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative numeric_columns = [ c for c in df.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="High", errors=validate_values(df, valid_numeric_columns, "transmission_line", min=0)) # Check that all binary new build tx lines are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_transmission_new_cost", severity="Mid", errors=validate_idxs( actual_idxs=tx_lines_w_cost, req_idxs=tx_lines, idx_label="transmission_line", msg=msg))
def validate_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ hrzs, hrz_tmps = get_inputs_from_database(scenario_id, subscenarios, subproblem, stage, conn) c = conn.cursor() periods_horizons = c.execute(""" SELECT balancing_type_horizon, period, horizon FROM periods_horizons WHERE temporal_scenario_id = {} AND subproblem_id = {} and stage_id = {} """.format(subscenarios.TEMPORAL_SCENARIO_ID, subproblem, stage)) df_hrzs = cursor_to_df(hrzs) df_hrz_tmps = cursor_to_df(hrz_tmps) df_periods_hrzs = cursor_to_df(periods_horizons) # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=[ "inputs_temporal_horizons", "inputs_temporal_horizon_timepoints" ]) # Check dtypes horizons dtype_errors, error_columns = validate_dtypes(df_hrzs, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="High", errors=dtype_errors) # Check dtypes horizon_timepoints dtype_errors, error_columns = validate_dtypes(df_hrz_tmps, expected_dtypes) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=dtype_errors) # Check valid numeric columns are non-negative - horizons numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database(conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizons", severity="Mid", errors=validate_values(df_hrzs, valid_numeric_columns, "horizon", min=0)) # Check valid numeric columns are non-negative - horizon_timepoints numeric_columns = [ c for c in df_hrzs.columns if expected_dtypes[c] == "numeric" ] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="Mid", errors=validate_values(df_hrz_tmps, valid_numeric_columns, ["horizon", "timepoint"], min=0)) # One horizon cannot straddle multiple periods msg = "All timepoints within a horizon should belong to the same period." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_single_input( df=df_periods_hrzs, idx_col=["balancing_type_horizon", "horizon"], msg=msg)) # Make sure there are no missing horizon inputs write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_temporal_horizon_timepoints", severity="High", errors=validate_missing_inputs( df=df_hrz_tmps, col="horizon", idx_col=["balancing_type_horizon", "timepoint"]))
def validate_module_specific_inputs(scenario_id, subscenarios, subproblem, stage, conn): """ Get inputs from database and validate the inputs :param subscenarios: SubScenarios object with all subscenario info :param subproblem: :param stage: :param conn: database connection :return: """ # Get the binary build generator inputs new_gen_costs, new_build_size = get_module_specific_inputs_from_database( scenario_id, subscenarios, subproblem, stage, conn) projects = get_projects(conn, scenario_id, subscenarios, "capacity_type", "gen_new_bin") # Convert input data into pandas DataFrame cost_df = cursor_to_df(new_gen_costs) bld_size_df = cursor_to_df(new_build_size) # get the project lists cost_projects = cost_df["project"].unique() bld_size_projects = bld_size_df["project"] # Get expected dtypes expected_dtypes = get_expected_dtypes( conn=conn, tables=["inputs_project_new_cost", "inputs_project_new_binary_build_size"] ) # Check dtypes - cost_df dtype_errors, error_columns = validate_dtypes(cost_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - cost_df numeric_columns = [c for c in cost_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="High", errors=validate_values(cost_df, valid_numeric_columns, min=0) ) # Check dtypes - bld_size_df dtype_errors, error_columns = validate_dtypes(bld_size_df, expected_dtypes) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=dtype_errors ) # Check valid numeric columns are non-negative - bld_size_df numeric_columns = [c for c in bld_size_df.columns if expected_dtypes[c] == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_values(bld_size_df, valid_numeric_columns, min=0) ) # Check that all binary new build projects are available in >=1 vintage msg = "Expected cost data for at least one vintage." write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_cost", severity="Mid", errors=validate_idxs(actual_idxs=cost_projects, req_idxs=projects, idx_label="project", msg=msg) ) # Check that all binary new build projects have build size specified write_validation_to_database( conn=conn, scenario_id=scenario_id, subproblem_id=subproblem, stage_id=stage, gridpath_module=__name__, db_table="inputs_project_new_binary_build_size", severity="High", errors=validate_idxs(actual_idxs=bld_size_projects, req_idxs=projects, idx_label="project") )
def test_validate_dtypes(self): """ :return: """ df_columns = ["project", "capacity"] test_cases = { # Make sure correct inputs don't throw error 1: {"df": pd.DataFrame( columns=df_columns, data=[["gas_ct", 10], ["coal_plant", 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ([], []) }, # Test invalid string column 2: {"df": pd.DataFrame( columns=df_columns, data=[["gas_ct", 10], ["coal_plant", "string"]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ( ["Invalid data type for column 'capacity'; expected numeric"], ["capacity"] )}, # Test invalid numeric column 3: {"df": pd.DataFrame( columns=df_columns, data=[[1, 10], [1, 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ( ["Invalid data type for column 'project'; expected string"], ["project"] )}, # If at least one string in the column, pandas will convert # all column data to string so there will be no error 4: {"df": pd.DataFrame( columns=df_columns, data=[["gas_ct", 10], [1, 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ([], []) }, # Columns with all None are ignored 5: {"df": pd.DataFrame( columns=df_columns, data=[[None, 10], [None, 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ([], []) }, # Columns with all NaN are ignored 6: {"df": pd.DataFrame( columns=df_columns, data=[[np.nan, 10], [np.nan, 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ([], []) }, # Columns with some None are not ignored 7: {"df": pd.DataFrame( columns=df_columns, data=[[10, 10], [None, 20]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ( ["Invalid data type for column 'project'; expected string"], ["project"] )}, # Test multiple error columns 8: {"df": pd.DataFrame( columns=df_columns, data=[[10, "string"], [10, "string"]]), "expected_dtypes": { "project": "string", "capacity": "numeric"}, "result": ( ["Invalid data type for column 'project'; expected string", "Invalid data type for column 'capacity'; expected numeric"], ["project", "capacity"] )} } for test_case in test_cases.keys(): expected_tuple = test_cases[test_case]["result"] actual_tuple = module_to_test.validate_dtypes( df=test_cases[test_case]["df"], expected_dtypes=test_cases[test_case]["expected_dtypes"] ) self.assertTupleEqual(expected_tuple, actual_tuple)