def create_time_features(df_input): """ This function generates all features from the base data. :param df_input: Dataframe with cols that need to be transformed into cyclical representation. :return: pandas.DataFrame: Returns a dataframe with all generated features. """ logger.info("Start create_time_features()") df_wip_time_features = df_input.copy() df_wip_time_features = utils.unset_datecol_as_index_if_needed(df_wip_time_features) df_wip_time_features['Datum_dt_bckp'] = df_wip_time_features[DATE_COL].astype(str) df_wip_time_features_idx = utils.set_datecol_as_index_if_needed(df_wip_time_features) # generate simple time features to model seasonal behaviour df_wip_time_features_idx['day_in_month'] = df_wip_time_features_idx.index.day df_wip_time_features_idx['calendar_week'] = df_wip_time_features_idx.index.isocalendar().week df_wip_time_features_idx['weekday'] = df_wip_time_features_idx.index.weekday df_wip_time_features_idx['dayofyear'] = df_wip_time_features_idx.index.dayofyear df_wip_time_features_idx['month'] = df_wip_time_features_idx.index.month # generate cyclical time features to model seasonal behaviour df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'day_in_month', 30) df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'dayofyear', 365) df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'weekday', 7) df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'calendar_week', 52) df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'month', 12) # generate the number of days since the last survey df_wip_nb_days_last = add_number_of_days_since_last_surbey(df_wip_time_features_idx) df_all_time_features = df_wip_nb_days_last return df_all_time_features
def export_results(df_input): """ This function writes the generated predictions to variousu sources so that the preds can be consumed py other applications. :param df_input: The dataframe with predictions. """ logger.info("Start export_results()") df_working = df_input.copy() df_working = utils.unset_datecol_as_index_if_needed(df_working) output_col_names = [DATE_COL] + get_pred_col_names() + ['estimator'] target_table_name = 'sonntagsfrage.predictions_questionaire' df_output = df_working[output_col_names] # open connection conn, cursor = connect_to_azure_sql_db() # write to Azure SQL DB if WRITE_TO_AZURE: write_df_to_sql_db(df_output, conn, cursor, target_table_name, header=False, delete_dates=False)
def generate_features(df_input): """ This function generates all features from the base data. :param df_input: Dataframe with cols that need to be transformed into cyclical representation. :return: pandas.DataFrame: Returns a dataframe with all generated features. """ logger.info("Start generate_features()") df_no_features = df_input.copy() df_added_time_features = create_time_features(df_no_features) df_all_features = utils.unset_datecol_as_index_if_needed(df_added_time_features) all_cols = FEATURE_COLS_USED + [DATE_COL] + TARGET_COLS df_chosen_features = df_all_features[all_cols] utils.write_df_to_file(df_chosen_features, 'generate_features_all_features') return df_chosen_features
# --- calc predictions print(f"Add features to survey data ...") df_with_preds, df_metrics = model.combined_restults_from_all_algorithms( df_with_features) # --- define output parameters output_fname = 'df_with_preds' mode = 'parquet' # --- get ws from run run = Run.get_context() ws = run.experiment.workspace datastore = Datastore.get_default(ws) # --- register preds df_for_register = utils.unset_datecol_as_index_if_needed(df_with_preds) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'sonntagsfrage_preds') # --- register metrics df_for_register = utils.unset_datecol_as_index_if_needed(df_metrics) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'sonntagsfrage_metrics') # --- write output to Azure SQL DB print("Writing file to Azure SQL DB ...") output.export_results(df_with_preds) # --- write output to file
output_path = os.path.join(output_dir) # --- load input print(f"Load file from last step ...") df_all_data = utils.load_df_from_file('df_all_data', input_path, 'parquet') # --- add features print(f"Add features to survey data ...") df_with_features = feat.generate_features(df_all_data) # --- define output parameters output_fname = 'df_with_features' mode = 'parquet' # --- register dataset df_for_register = utils.unset_datecol_as_index_if_needed(df_with_features) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'survey_data_with_all_features') # --- write output print("Writing file " + output_fname + "." + mode + " to path " + output_path + " ...") utils.write_df_to_file(df_with_features, output_fname, output_path, mode, force_write=True) # --- Done print("Done.")