def predict(self, mode='predict', ignore_columns=[]): predict_dataframe, model_definition = self._create_ludwig_dataframe(mode) model_definition = self.transaction.hmd['ludwig_data']['model_definition'] model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) if self.transaction.lmd['model_order_by'] is None: timeseries_cols = [] else: timeseries_cols = list(map(lambda x: x[0], self.transaction.lmd['model_order_by'])) if len(timeseries_cols) > 0: predict_dataframe, model_definition = self._translate_df_to_timeseries_format(predict_dataframe, model_definition, timeseries_cols) for ignore_col in ignore_columns: try: predict_dataframe[ignore_col] = [None] * len(predict_dataframe[ignore_col]) except: for date_appendage in ['_year', '_month','_day']: predict_dataframe[ignore_col + date_appendage] = [None] * len(predict_dataframe[ignore_col + date_appendage]) with disable_ludwig_output(): model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) predictions = model.predict(data_df=predict_dataframe) for col_name in predictions: col_name_normalized = col_name.replace('_predictions', '') predictions = predictions.rename(columns = {col_name: col_name_normalized}) return predictions
def predict(self, mode='predict', ignore_columns=[]): predict_dataframe, model_definition, timeseries_cols = self._create_ludwig_dataframe( mode) model_definition = self.transaction.hmd['ludwig_data'][ 'model_definition'] if len(timeseries_cols) > 0: predict_dataframe, model_definition = self._translate_df_to_timeseries_format( predict_dataframe, model_definition, timeseries_cols) for ignore_col in ignore_columns: try: predict_dataframe[ignore_col] = [None] * len( predict_dataframe[ignore_col]) except: for date_appendage in ['_year', '_month', '_day']: predict_dataframe[ignore_col + date_appendage] = [ None ] * len(predict_dataframe[ignore_col + date_appendage]) with disable_ludwig_output(True): model_dir = self.get_model_dir() model = LudwigModel.load(model_dir=model_dir) predictions = model.predict(data_df=predict_dataframe, gpus=self.get_useable_gpus()) for col_name in predictions: col_name_normalized = col_name.replace('_predictions', '') predictions = predictions.rename( columns={col_name: col_name_normalized}) return predictions
def train(self): training_dataframe, model_definition = self._create_ludwig_dataframe('train') if self.transaction.lmd['model_order_by'] is None: timeseries_cols = [] else: timeseries_cols = list(map(lambda x: x[0], self.transaction.lmd['model_order_by'])) if len(timeseries_cols) > 0: training_dataframe, model_definition = self._translate_df_to_timeseries_format(training_dataframe, model_definition, timeseries_cols, 'train') with disable_ludwig_output(True): model = LudwigModel(model_definition) # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295 #model.initialize_model(train_set_metadata={}) #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name'] if self.transaction.lmd['rebuild_model'] is True: train_stats = model.train(data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True) else: model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) train_stats = model.train(data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=True) #,model_load_path=self.transaction.lmd['ludwig_data']['ludwig_save_path']) for k in train_stats['train']: if k not in self.transaction.lmd['model_accuracy']['train']: self.transaction.lmd['model_accuracy']['train'][k] = [] self.transaction.lmd['model_accuracy']['test'][k] = [] elif k is not 'combined': # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway pass else: self.transaction.lmd['model_accuracy']['train'][k].extend(train_stats['train'][k]['accuracy']) self.transaction.lmd['model_accuracy']['test'][k].extend(train_stats['test'][k]['accuracy']) ''' @ TRAIN ONLINE BIT That's not working model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) for i in range(0,100): train_stats = model.train_online(data_df=training_dataframe) # The resulting train_stats are "None"... wonderful -_- ''' ludwig_model_savepath = os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.transaction.lmd['name'] + '_ludwig_data') model.save(ludwig_model_savepath) model.close() self.transaction.lmd['ludwig_data'] = {'ludwig_save_path': ludwig_model_savepath} self.transaction.hmd['ludwig_data'] = {'model_definition': model_definition}
def train(self): training_dataframe, model_definition, timeseries_cols = self._create_ludwig_dataframe( 'train') if len(timeseries_cols) > 0: training_dataframe, model_definition = self._translate_df_to_timeseries_format( training_dataframe, model_definition, timeseries_cols, 'train') with disable_ludwig_output(True): # <---- Ludwig currently broken, since mode can't be initialized without train_set_metadata and train_set_metadata can't be obtained without running train... see this issue for any updates on the matter: https://github.com/uber/ludwig/issues/295 #model.initialize_model(train_set_metadata={}) #train_stats = model.train_online(data_df=training_dataframe) # ??Where to add model_name?? ----> model_name=self.transaction.lmd['name'] ludwig_save_is_working = False if not ludwig_save_is_working: shutil.rmtree('results', ignore_errors=True) if self.transaction.lmd['rebuild_model'] is True: model = LudwigModel(model_definition) merged_model_definition = model.model_definition train_set_metadata = build_metadata( training_dataframe, (merged_model_definition['input_features'] + merged_model_definition['output_features']), merged_model_definition['preprocessing']) model.initialize_model(train_set_metadata=train_set_metadata, gpus=self.get_useable_gpus()) train_stats = model.train( data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=ludwig_save_is_working, skip_save_progress=True, gpus=self.get_useable_gpus()) else: model = LudwigModel.load(model_dir=self.get_model_dir()) train_stats = model.train( data_df=training_dataframe, model_name=self.transaction.lmd['name'], skip_save_model=ludwig_save_is_working, skip_save_progress=True, gpus=self.get_useable_gpus()) for k in train_stats['train']: if k not in self.transaction.lmd['model_accuracy']['train']: self.transaction.lmd['model_accuracy']['train'][k] = [] self.transaction.lmd['model_accuracy']['test'][k] = [] elif k is not 'combined': # We should be adding the accuracy here but we only have it for combined, so, for now use that, will only affect multi-output scenarios anyway pass else: self.transaction.lmd['model_accuracy']['train'][k].extend( train_stats['train'][k]['accuracy']) self.transaction.lmd['model_accuracy']['test'][k].extend( train_stats['test'][k]['accuracy']) ''' @ TRAIN ONLINE BIT That's not working model = LudwigModel.load(self.transaction.lmd['ludwig_data']['ludwig_save_path']) for i in range(0,100): train_stats = model.train_online(data_df=training_dataframe) # The resulting train_stats are "None"... wonderful -_- ''' ludwig_model_savepath = os.path.join( CONFIG.MINDSDB_STORAGE_PATH, self.transaction.lmd['name'] + '_ludwig_data') if ludwig_save_is_working: model.save(ludwig_model_savepath) model.close() else: shutil.rmtree(ludwig_model_savepath, ignore_errors=True) shutil.move(os.path.join('results', os.listdir('results')[0]), ludwig_model_savepath) self.transaction.lmd['ludwig_data'] = { 'ludwig_save_path': ludwig_model_savepath } self.transaction.hmd['ludwig_data'] = { 'model_definition': model_definition }