def evaluate_submission(submission, intermediate_data_dir): if isinstance(submission, str): df_solution = pd.read_csv(submission) df_solution = df_solution.set_index(pd.to_datetime(df_solution.datetime, utc=True)) else: assert isinstance(submission, pd.DataFrame), '`submission` must either be a valid submission dataframe or a filepath to the submission' df_solution = submission df_real = clean.combine_training_datasets(intermediate_data_dir) df_real = df_real[df_real.index.isin(df_solution.index)] df_solution_charge = df_solution.between_time('00:00', '15:00') df_solution_discharge = df_solution.between_time('15:30', '20:30') df_real_charge = df_real.between_time('00:00', '15:00') df_real_discharge = df_real.between_time('15:30', '20:30') total_score, charge_score, discharge_score = calculate_score_s(df_solution_discharge.charge_MW, df_solution_charge.charge_MW, df_real_discharge.demand_MW, df_real_charge.pv_power_mw) df_results = pd.DataFrame({ 'total_score': total_score, 'charge_score': charge_score, 'discharge_score': discharge_score, 'max_charge_score': max_charge_score(df_real_charge.pv_power_mw) }) return df_results
def prepare_training_input_data(intermediate_data_dir, start_hour=4): # Loading input data df = clean.combine_training_datasets(intermediate_data_dir).interpolate( limit=1) df_features = construct_df_charge_features(df) # Filtering for overlapping feature and target data dt_idx = pd.date_range(df_features.index.min(), df['pv_power_mw'].dropna().index.max() - pd.Timedelta(minutes=30), freq='30T') s_pv = df.loc[dt_idx, 'pv_power_mw'] print(s_pv) df_features = df_features.loc[dt_idx] # Constructing the charge series s_charge = construct_charge_s(s_pv, start_time=f'0{start_hour}:00', end_time='15:00') # Filtering for evening datetimes charging_datetimes = extract_charging_datetimes(df_features, start_hour=start_hour) X = df_features.loc[charging_datetimes] y = s_charge.loc[charging_datetimes] return X, y
def prepare_training_input_data(intermediate_data_dir): # Loading input data df = clean.combine_training_datasets(intermediate_data_dir).interpolate( limit=1) df_features = construct_df_discharge_features(df) # Filtering for overlapping feature and target data dt_idx = pd.date_range(df_features.index.min(), df['demand_MW'].dropna().index.max() - pd.Timedelta(minutes=30), freq='30T') s_demand = df.loc[dt_idx, 'demand_MW'] df_features = df_features.loc[dt_idx] # Constructing the discharge series s_discharge = construct_discharge_s(s_demand, start_time='15:30', end_time='20:30') # Filtering for evening datetimes evening_datetimes = extract_evening_datetimes(df_features) X = df_features.loc[evening_datetimes] y = s_discharge.loc[evening_datetimes] return X, y
def prepare_test_feature_data(raw_data_dir, intermediate_data_dir, test_start_date=None, test_end_date=None, start_time='08:00', end_time='23:59'): # Loading input data df = clean.combine_training_datasets(intermediate_data_dir).interpolate(limit=1) df_features = construct_df_charge_features(df) # Loading default index (latest submission) if test_end_date is None or test_start_date is None: index = discharge.load_latest_submission_template(raw_data_dir).index else: index = df_features[test_start_date:test_end_date].index # Filtering feature data on submission datetimes df_features = df_features.loc[index].between_time(start_time, end_time) return df_features
def prepare_test_feature_data(raw_data_dir, intermediate_data_dir, test_start_date=None, test_end_date=None): # Loading input data df_features = ( clean.combine_training_datasets(intermediate_data_dir).interpolate( limit=1).pipe(construct_df_discharge_features)) # Loading default index (latest submission) if test_end_date is None or test_start_date is None: index = load_latest_submission_template(raw_data_dir).index else: index = df_features[test_start_date:test_end_date].index # Filtering feature data on submission datetimes df_features = df_features.loc[index] return df_features