def train_workflow(self, datapath, wkey, workflow, prefix='main', remove_models=EACH_BEST): set_option('display.max_rows', 500) set_option('display.max_columns', 500) set_option('display.width', 1000) wf = workflow pfix = prefix error, dataset = self.check_path(datapath) if dataset is None: return error controller = Controller(e_c=self._ec) if controller.config_checks(): variables = dataset.columns.tolist() #for wkey, wvalue in wf.items(): if wf["data"]["filtered_columns"] is not None: for delete in wf["data"]["filtered_columns"]: try: variables.remove(delete) except Exception: self._logging.log_info('gDayF', "Workflow", self._labels["failed_var"], delete) self._logging.log_info('gDayF', "Workflow", self._labels["variables_desc"], variables) if wf["data"]["for_each"] is not None: fe_column = wf["data"]["for_each"] fe_data_exclusions = wf["data"]["for_each_exclusions"] fe_filtered_data = wf["data"]["filtered_data"] fe_parameters = wf["parameters"] fe_next = wf["Next"] for each in eval('dataset.' + fe_column + '.unique()'): if fe_data_exclusions is None or each not in fe_data_exclusions: aux_dataset = eval('dataset[dataset.' + fe_column + '== each]') pfix = xstr(prefix + '_' + str(each)) if fe_filtered_data is not None: qcolumn = fe_filtered_data["column"] quantile = aux_dataset[qcolumn].quantile( q=fe_filtered_data["quantile"]) aux_dataset = eval('aux_dataset.loc[aux_dataset.' + qcolumn + '<= ' + str(quantile) + ']') pfix = xstr(pfix + '_' + str(fe_filtered_data["quantile"])) if fe_parameters is not None: source_parameters = list() source_parameters.append( 'controller.exec_analysis(') source_parameters.append( 'datapath=aux_dataset.loc[:, variables]') for ikey, ivalue in fe_parameters.items(): source_parameters.append(',') source_parameters.append(ikey) source_parameters.append('=') if isinstance(ivalue, str) and ikey != "amode": source_parameters.append('\'') source_parameters.append(ivalue) source_parameters.append('\'') else: source_parameters.append(str(ivalue)) source_parameters.append(')') self._logging.log_info( 'gDayF', "Workflow", self._labels["desc_operation"], ''.join(source_parameters)) status, recomendations = eval( ''.join(source_parameters)) controller.remove_models(recomendations, mode=remove_models) controller.reconstruct_execution_tree( recomendations, metric=fe_parameters['metric'], store=True) #model_id = recomendations[0]['model_id'] table_model_list = controller.table_model_list( ar_list=recomendations, metric=eval(fe_parameters['metric'])) self._logging.log_info( 'gDayF', 'workflow', self._labels["results"] + '\n', table_model_list.to_string(justify="left")) #filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'train_performance' if self._config['common'][ 'workflow_summary_enabled']: filename = self.storage_path( 'train', str(pfix) + '_' + 'train_performance', 'xls') table_model_list.to_excel( filename, index=False, sheet_name='performance') self.replicate_file('train', filename=filename) prediction_frame = controller.exec_prediction( datapath=aux_dataset, model_file=recomendations[0]['json_path'][0] ['value']) try: if 'predict' in prediction_frame.columns.values: prediction_frame.rename( columns={"predict": wkey}, inplace=True) elif 'prediction' in prediction_frame.columns.values: prediction_frame.rename( columns={"prediction": wkey}, inplace=True) self._logging.log_info( 'gDayF', 'workflow', self._labels["results"] + '\n', prediction_frame.to_string( index_names=False, justify="left")) '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'prediction', 'xls')''' if self._config['common'][ 'workflow_summary_enabled']: filename = self.storage_path( 'train', str(pfix) + '_' + 'prediction', 'xls') prediction_frame.to_excel( filename, index=False, sheet_name='train_prediction') self.replicate_file('train', filename=filename) except AttributeError as oexecution_error: self._logging.log_info( 'gDayF', "Workflow", self._labels["failed_model"], str(repr(oexecution_error))) try: if fe_next is not None and prediction_frame is not None: self.workflow(prediction_frame, fe_next, pfix, remove_models=remove_models) except Exception as oexecution_error: self._logging.log_critical( 'gDayF', "Workflow", self._labels["failed_wf"], str(fe_next)) else: aux_dataset = dataset if wf["data"]["filtered_data"] is not None: qcolumn = wf["data"]["filtered_data"]["column"] quantile = aux_dataset[[qcolumn]].quatile( [wf["data"]["filtered_data"]["quantile"]]) aux_dataset = aux_dataset.query('%s <= %s' % (qcolumn, quantile)) if wf['parameters'] is not None: source_parameters = list() source_parameters.append('controller.exec_analysis(') source_parameters.append( 'datapath=aux_dataset.loc[:, variables]') for ikey, ivalue in wf['parameters'].items(): source_parameters.append(',') source_parameters.append(ikey) source_parameters.append('=') if isinstance(ivalue, str) and ikey != "amode": source_parameters.append('\'') source_parameters.append(ivalue) source_parameters.append('\'') else: source_parameters.append(str(ivalue)) source_parameters.append(')') self._logging.log_info('gDayF', "Workflow", self._labels["desc_operation"], ''.join(source_parameters)) status, recomendations = eval(''.join(source_parameters)) controller.remove_models(recomendations, mode=remove_models) controller.reconstruct_execution_tree( recomendations, metric=wf['parameters']['metric'], store=True) model_id = recomendations[0]['model_id'] table_model_list = controller.table_model_list( ar_list=recomendations, metric=eval(wf['parameters']['metric'])) self._logging.log_info( 'gDayF', 'workflow', self._labels["results"] + '\n', table_model_list.to_string(justify="left")) if self._config['common']['workflow_summary_enabled']: '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'train_performance', 'xls')''' filename = self.storage_path( 'train', str(pfix) + '_' + 'train_performance', 'xls') table_model_list.to_excel(filename, index=False, sheet_name="performace") self.replicate_file('train', filename=filename) prediction_frame = controller.exec_prediction( datapath=aux_dataset, model_file=recomendations[0]['json_path'][0]['value']) try: if 'predict' in prediction_frame.columns.values: prediction_frame.rename(columns={"predict": wkey}, inplace=True) elif 'prediction' in prediction_frame.columns.values: prediction_frame.rename( columns={"prediction": wkey}, inplace=True) self._logging.log_info( 'gDayF', 'workflow', self._labels["results"] + '\n', prediction_frame.to_string(index_names=False, justify="left")) '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'prediction', 'xls')''' if self._config['common']['workflow_summary_enabled']: filename = self.storage_path( 'train', str(pfix) + '_' + 'prediction', 'xls') prediction_frame.to_excel( filename, index=False, sheet_name="train_prediction") self.replicate_file('train', filename=filename) except AttributeError as oexecution_error: self._logging.log_info('gDayF', "Workflow", self._labels["failed_model"], str(repr(oexecution_error))) if wf['Next'] is not None and prediction_frame is not None: try: self.workflow(datapath=prediction_frame, workflow=wf['Next'], prefix=pfix, remove_models=remove_models) except Exception as oexecution_error: self._logging.log_critical( 'gDayF', "Workflow", self._labels["failed_wf"], str(wf['Next'])) self._logging.log_critical( 'gDayF', "Workflow", self._labels["failed_wf"], repr(oexecution_error)) controller.clean_handlers() del controller
#Analysis controller = Controller() if controller.config_checks(): data_train, data_test = DataLoad().dm() status, recomendations = controller.exec_analysis( datapath=data_train, objective_column='Weather_Temperature', amode=POC, metric='test_rmse', deep_impact=5) controller.reconstruct_execution_tree(arlist=None, metric='test_rmse', store=True) controller.remove_models(recomendations, mode=EACH_BEST) set_option('display.max_rows', 500) set_option('display.max_columns', 50) set_option('display.max_colwidth', 100) set_option('display.precision', 4) set_option('display.width', 1024) #Prediction print('Starting Prediction\'s Phase') prediction_frame = controller.exec_prediction( datapath=data_test, model_file=recomendations[0]['json_path'][0]['value']) if 'predict' in prediction_frame.columns.values:
"Strong-braking100-B", "out-rpm-B", "Braking100-N", "Rollout100-N", "Idling100-N", "Cruise-Control-N", "V_Cluster" ] model_columns = modification(columns, ignore_columns) print(model_columns) status, recomendations = controller.exec_analysis( datapath=model_data[model_columns], objective_column=objective_column, amode=NORMAL, metric='test_rmse', deep_impact=8) controller.reconstruct_execution_tree(metric='test_rmse', store=True) controller.remove_models(arlist=recomendations, mode=BEST) print( controller.table_model_list(ar_list=recomendations, metric='test_rmse')) prediction_frame = controller.exec_prediction( datapath=model_data, model_file=recomendations[0]['json_path'][0]['value']) model_data['predict'] = prediction_frame['predict'] source_3_data = list() source_3_data.append( '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/' ) source_3_data.append('summary/predict/') source_3_data.append('Ensemble_Avg-fuel_prediction.xls')