def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file( path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters( **data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load raw data settings data_param = Data2ViewParameters( **data_config.get("view_data_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info( "======================================================================" ) logging.info('Loading Raw Data:') data = ds.load_data() logging.info( "======================================================================" ) logging.info('Update visualization of data:') dv = DataVisualization(title=env.param.app_name, data_param=data_param) dv.update_page(data=data) # =========================================================================================== # Script Performance env.close_script()
def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file(path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters(**data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load data processing class data_param = Static2ValueParameters(**data_config.get("static2value_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info("======================================================================") logging.info("Loading Training and Test Data:") data_train_input, data_train_target = ds.load_dataset(subset='train') logging.info("======================================================================") logging.info("Fit and Transform Training Data:") ( data_train_input, data_train_target, input_var_dict, target_var_dict, numerical_input_encoder_list, categorical_input_encoder_int_list, categorical_input_encoder_hot_list, categorical_input_encoder_bin_list, categorical_input_int_to_cat_dict_list, categorical_input_cat_to_int_dict_list, txt_int_to_word_dict_list_input, txt_word_to_int_dict_list_input, numerical_output_encoder_list, categorical_output_encoder_int_list, categorical_output_encoder_hot_list, categorical_output_encoder_bin_list, int_to_cat_dict_list_output_list, cat_to_int_dict_list_output_list, ) = ds.fit_transform_train_data( data_train_input=data_train_input, data_train_target=data_train_target ) logging.info("======================================================================") logging.info("Building Model:") model = self.model_selection(data_config=data_config, data_param=data_param, environment=env) model.fit( data_input=data_train_input, data_target=data_train_target, input_var_dict=input_var_dict, target_var_dict=target_var_dict, target_cat_dict=cat_to_int_dict_list_output_list ) logging.info("======================================================================") logging.info("Building predictions:") data_train_predict = model.eval_predict( data_input=data_train_input, input_var_dict=input_var_dict, int_to_cat_dict_target=None) logging.info("======================================================================") logging.info("Training Results") model_eval_train = RegressionModelEvaluation( Y_target=data_train_target[data_param.output_target], Y_predict=data_train_predict[['predict']], Y_reliability=data_train_predict[['reliability']], subset_label="eval_train_", regression_type=data_param.regression_type, train_history=model.history ) # checking metrics model_eval_train.execute() # =========================================================================================== # Saving files logging.info("======================================================================") logging.info("Saving Training Results:") # prediction report prediction_report = model_eval_train.get_prediction_report() Util.save_dataframe(data=prediction_report, folder_path=env.run_folder, prefix=env.prefix_name + "pred_train_report") # =========================================================================================== # ploting results if env_param.view_plots or env_param.save_plots: logging.info("======================================================================") logging.info("Plotting training result graphs") model_eval_train.plot_training_results( view=env_param.view_plots, save=env_param.save_plots, path=env.run_folder, prefix=env.prefix_name + "train_", ) # =========================================================================================== # Evaluating test dataset # =========================================================================================== # Loading data logging.info("======================================================================") logging.info("Loading Test Data:") # exclude data_train for memory optimization del (data_train_input) del (data_train_target) del (data_train_predict) # loading test data data_test_input, data_test_target = ds.load_dataset(subset='test') logging.info("======================================================================") logging.info("Transform Test Data:") ( data_test_input, data_test_target, ) = ds.transform_test_data( data_test_input=data_test_input, data_test_target=data_test_target, input_var_dict=input_var_dict, target_var_dict=input_var_dict, numerical_input_encoder_list=numerical_input_encoder_list, categorical_input_encoder_int_list=categorical_input_encoder_int_list, categorical_input_encoder_hot_list=categorical_input_encoder_hot_list, categorical_input_encoder_bin_list=categorical_input_encoder_bin_list, categorical_int_to_cat_dict_list_input=categorical_input_int_to_cat_dict_list, categorical_cat_to_int_dict_list_input=categorical_input_cat_to_int_dict_list, txt_int_to_word_dict_list_input=txt_int_to_word_dict_list_input, txt_word_to_int_dict_list_input=txt_word_to_int_dict_list_input, numerical_output_encoder_list=numerical_output_encoder_list, categorical_output_encoder_int_list=categorical_output_encoder_int_list, categorical_output_encoder_hot_list=categorical_output_encoder_hot_list, categorical_output_encoder_bin_list=categorical_output_encoder_bin_list, int_to_cat_dict_list_output_list=int_to_cat_dict_list_output_list, cat_to_int_dict_list_output_list=cat_to_int_dict_list_output_list, ) logging.info("======================================================================") logging.info("Test Results") data_test_predict = model.eval_predict(data_input=data_test_input, input_var_dict=input_var_dict, int_to_cat_dict_target=None) model_eval_test = RegressionModelEvaluation( Y_target=data_test_target[data_param.output_target], Y_predict=data_test_predict[['predict']], Y_reliability=data_test_predict[['reliability']], subset_label="eval_test_", regression_type=data_param.regression_type, train_history=model.history ) # checking metrics model_eval_test.execute() # =========================================================================================== # Saving files logging.info("======================================================================") logging.info("Saving Testing Results:") # prediction report prediction_report = model_eval_test.get_prediction_report() Util.save_dataframe(data=prediction_report, folder_path=env.run_folder, prefix=env.prefix_name + "pred_test_report") # =========================================================================================== # ploting results if env_param.view_plots or env_param.save_plots: logging.info("======================================================================") logging.info("Plotting test result graphs") model_eval_test.plot_test_results( view=env_param.view_plots, save=env_param.save_plots, path=env.run_folder, prefix=env.prefix_name + "test_", ) # =========================================================================================== # Register tracking info if env.param.tracking: env.publish_results(history=ds.history) env.publish_results(history=model.history) env.publish_results(history=model_eval_train.history) env.publish_results(history=model_eval_test.history) env.tracking.log_artifacts_folder(local_dir=env.run_folder) # =========================================================================================== # Script Performance env.close_script()
def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file(path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters(**data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load data processing class data_param = Static2ClusterParameters(**data_config.get("static2cluster_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info("======================================================================") logging.info("Loading Training and Test Data:") data_train_input, _ = ds.load_dataset() data_test_input, _ = ds.load_test_data() logging.info("======================================================================") logging.info("Preprocessing Training Data:") ( data_train_input, _, data_test_input, _, variables_input, _, _, _, ) = ds.prepare_train_test_data( data_train_input=data_train_input, data_test_input=data_test_input ) logging.info("======================================================================") logging.info("Building Model:") model = self.model_selection(data_config=data_config, data_param=data_param) model.fit( data_input=data_train_input[variables_input] ) logging.info("======================================================================") logging.info("Building predictions:") # TODO logging.info("======================================================================") logging.info("Training Results") # TODO logging.info("======================================================================") logging.info("Test Results") # TODO # =========================================================================================== # Saving model logging.info("======================================================================") logging.info("Saving Results:") # =========================================================================================== # Register tracking info if env.param.tracking: env.publish_results(history=ds.history) env.tracking.log_artifacts_folder(local_dir=env.run_folder) # =========================================================================================== # Script Performance env.close_script()
def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file(path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters(**data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load data processing class data_param = Txt2VecParameters(**data_config.get("txt2vec_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info("======================================================================") logging.info("Loading Training and Test Data:") data_train_input, data_train_target = ds.load_dataset() logging.info("======================================================================") logging.info("Preprocessing Training Data:") ( data_train_input, variables_input ) = ds.prepare_corpus_data(data=data_train_input) logging.info("======================================================================") logging.info("Building Model:") # select model technology model = self.model_selection(data_config=data_config, data_param=data_param, environment=env) # build model model.fit( dataframe= data_train_input, corpus_col=variables_input[0] ) logging.info("======================================================================") logging.info("Training Results") # todo a embedding evaluation # model_eval_train = ClassificationModelEvaluation( # Y_target=data_train_target[data_param.output_target], # Y_predict=data_train_predict[['predict']], # subset_label="Train", # classification_type=data_param.classification_type, # Y_int_to_cat_labels=int_to_cat_dict_list_target, # Y_cat_to_int_labels=cat_to_int_dict_list_target, # history=None, # ) #model_eval_train.print_evaluation_scores() #env.tracking.publish_c_eval(model_eval=model_eval_train, mode="train") if env_param.view_plots or env_param.save_plots: logging.info("======================================================================") logging.info("Plotting training result graphs") if env_param.save_plots: logging.info("Plots will save in " + env.run_folder) if env_param.view_plots: logging.info("Plots will view in window popup") # model_eval_train.plot_evaluation_scores( # view=env_param.view_plots, # save=env_param.save_plots, # path=env.run_folder, # prefix=env.prefix_name + "train_", # ) # =========================================================================================== # Saving model logging.info("======================================================================") logging.info("Saving Results:") model.save_model() # =========================================================================================== # Register tracking info if env.param.tracking: env.publish_results(history=ds.history) env.tracking.log_artifacts_folder(local_dir=env.run_folder) # =========================================================================================== # Script Performance env.close_script()
def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file(path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters(**data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load data processing class data_param = Dataprep2DatasetParameters(**data_config.get("dataset_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info("======================================================================") logging.info("Loading Raw Data:") data = ds.load_data() logging.info("======================================================================") logging.info("Split train and test data subsets:") data_train, data_test = ds.build_dataset(data=data) # =========================================================================================== # Analysis of data subsets logging.info("======================================================================") logging.info("Descritive Analysis - Training Data:") ds.descriptive_analysis( data=data_train, view_plots=env.param.view_plots, save_plots=env.param.save_plots, save_analysis=False, folder_path=env.run_folder, prefix=env.prefix_name, ) logging.info("======================================================================") logging.info("Descritive Analysis - Test Data:") ds.descriptive_analysis( data=data_test, view_plots=env.param.view_plots, save_plots=env.param.save_plots, save_analysis=False, folder_path=env.run_folder, prefix=env.prefix_name, ) # =========================================================================================== # Saving dataset logging.info("======================================================================") logging.info("Saving Datasets:") ds.save_datasets( data_train=data_train, data_test=data_test, folder_path=env.run_folder, prefix=env.prefix_name, ) # =========================================================================================== # Register tracking info if env.param.tracking: env.publish_results(history=ds.history) # =========================================================================================== # Script Performance env.close_script()
def run(self): # ========================================================================================= # Environment variables load_dotenv(find_dotenv()) PYTHON_WARNINGS = os.getenv("PYTHON_WARNINGS") # =========================================================================================== # Script Setup # Loading json file data_config = Util.load_parameters_from_file( path_file=self.parameters_file) # Validate parameters and load environment class env_param = EnvironmentParameters( **data_config.get("environment_parameters")) env = Environment(param=env_param) # Validade parameters and load data processing class data_param = Data2DataprepParameters( **data_config.get("prep_data_parameters")) ds = DataProcessing(param=data_param) # =========================================================================================== # Setup environment env.init_script(script_name=os.path.basename(__file__), warnings_level=PYTHON_WARNINGS) # =========================================================================================== # Loading data logging.info( "======================================================================" ) logging.info('Loading Raw Data:') data = ds.load_data() logging.info( "======================================================================" ) logging.info('Preprocessing Raw Data:') data = ds.prep_rawdata(data=data) # =========================================================================================== # Analysis of dataprep logging.info( "======================================================================" ) logging.info('Descritive Analysis:') ds.descriptive_analysis(data=data, view_plots=env.param.view_plots, save_plots=env.param.save_plots, save_analysis=True, folder_path=env.run_folder, prefix=env.prefix_name) # =========================================================================================== # Saving data logging.info( "======================================================================" ) logging.info('Saving preprocessed data:') ds.save_dataframe(data=data, folder_path=env.run_folder, prefix=env.prefix_name) # =========================================================================================== # Register tracking info if env.param.tracking: env.publish_results(history=ds.history) # =========================================================================================== # Script Performance env.close_script()