def run(self): self.activateTestSplit() self.best_model = Scan(self.dataset['X_train'], self.dataset['Y_train'], model=self.model, params=self.params, print_params=True, experiment_name=self.mode + '_model', reduction_metric='val_loss').best_model( metric='val_loss', asc=True) if self.plot: plt.plot(np.log(self.history.history['loss']) / np.log(40)) plt.plot(np.log(self.history.history['val_loss']) / np.log(40)) plt.title(self.mode + ' Training Results') plt.ylabel('Log(40) Linear Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() print('Model trained on ', self.dataset['X_train'].shape[0], ' samples, verified on ', self.dataset['X_val'].shape[0], ' samples') shutil.rmtree(self.mode + '_model') model_path = 'models/' + self.mode + '_model' + '_' + str( int(round(time.time() * 1000))) + '.h5' self.best_model.save(model_path) print('Model saved to: ' + model_path)
model.add(Flatten()) model.add(Dense(params['first_neuron'], activation=params['activation'])) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model out = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=2, batch_size=params['batch_size'], verbose=0) return out, model p = { "first_neuron": [125, 250, 300], "activation": ["relu", "elu"], "batch_size": [32, 64, 128], } exp_name = "movie_review_test" t = Scan( x=X_train, y=Y_train, x_val=X_val, y_val=Y_val, params=p, model=movie_review, experiment_name=exp_name, )
epochs=params['epochs'], verbose=1, validation_data=[X_val_resampled, y_val_resampled], callbacks=[ early_stopper(epochs=params['epochs'], mode='moderate', monitor='val_loss') ]) return history, model h = Scan(X_train_resampled, y_train_resampled, model=fraud_model, params=p, grid_downsample=0.1, print_params=True, dataset_name="creditcardfraud", experiment_no='1', reduction_metric="val_loss", reduce_loss=True) e = ta.Evaluate(h) evaluation = e.evaluate(X_test, y_test, model_id=None, folds=folds, shuffle=True, metric='val_loss', asc=True) #deploy and restore
Dense(param['first_neuron'], input_dim=x_train.shape[1], activation=param['activation'], kernel_initializer='orthogonal')) model.add(Dropout(param['dropout'])) hidden_layers(model, param, 1) model.add( Dense(y_train.shape[1], activation='softmax', kernel_initializer='orthogonal')) opt = keras.optimizers.Adam(lr=param['lr'], decay=param['decay']) model.compile(optimizer=opt, loss=param['losses'], metrics=['acc']) out = model.fit(x_train, y_train, batch_size=param['batch_size'], epochs=param['epochs'], verbose=0, validation_data=[x_val, y_val]) return out, model h = Scan(x=train_data, y=train_y, x_val=valid_data, y_val=valid_y, params=haper, dataset_name='Hbb_optimization', experiment_no='1', model=hbb_model, grid_downsample=0.5)
metrics=['accuracy']) model.summary() # LOGS tb_callback = TensorBoard(log_dir=logs_name(p)) history = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=p['batch_size'], epochs=p['epochs'], callbacks=[ tb_callback, LambdaCallback(on_train_begin=logs_refresh_mlf_run, on_epoch_end=logs_on_epoch_end) ], verbose=0) return history, model x_train, x_test, y_train, y_test = data_processing() scanResults = Scan(x=x_train, y=y_train, x_val=x_test, y_val=y_test, model=cactus_model, params=params, dataset_name="cactus", experiment_no=str(round(time.time())))
def HyperScan(self,data,list_inputs,list_outputs,task,model_idx=None,generator=False,resume=False): """ Performs the scan for hyperparameters If task is specified, will load a pickle dict splitted from the whole set of parameters Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables) The column to be selected are given in list_inputs, list_outputs as lists of strings Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py """ logging.info(' Starting scan '.center(80,'-')) # Printing # logging.info('Number of features : %d'%len(list_inputs)) for name in list_inputs: logging.info('..... %s'%name) logging.info('Number of outputs : %d'%len(list_outputs)) for name in list_outputs: logging.info('..... %s'%name) # Records # if not generator: self.x = data[list_inputs].values self.y = data[list_outputs+['learning_weights']].values # Data splitting # if model_idx is None: size = parameters.training_ratio/(parameters.training_ratio+parameters.evaluation_ratio) self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x,self.y,train_size=size) else: # Cross validation : take the training and evaluation set based on the mask # model_idx == index of mask on which model will be applied (aka, not trained nor evaluated) _, eval_idx, train_idx = GenerateSliceIndices(model_idx) #, GenerateSliceMask eval_mask = GenerateSliceMask(eval_idx,data['mask']) train_mask = GenerateSliceMask(train_idx,data['mask']) self.x_val = self.x[eval_mask] self.y_val = self.y[eval_mask] self.x_train = self.x[train_mask] self.y_train = self.y[train_mask] logging.info("Training set : %d"%self.x_train.shape[0]) logging.info("Evaluation set : %d"%self.x_val.shape[0]) else: # Needs to use dummy inputs to launch talos scan but in Model the generator will be used dummyX = np.ones((1,len(list_inputs))) dummyY = np.ones((1,len(list_outputs)+1)) # emulates output + weights self.x_train = dummyX self.y_train = dummyY self.x_val = dummyX self.y_val = dummyY # Talos hyperscan parameters # self.task = task if self.task != '': # if task is specified load it otherwise get it from parameters.py with open(os.path.join(parameters.main_path,'split',self.name,self.task), 'rb') as f: self.p = pickle.load(f) else: # We need the full dict self.p = parameters.p # If resume, puts it as argument ot be passed to function # # Also, needs to change the dictionary parameters for the one in the imported model # if resume: logging.info("Will resume training of model %s"%parameters.resume_model) # Get model and extract epoch range # a = Restore(parameters.resume_model,custom_objects=self.custom_objects) initial_epoch = a.params['epochs'][0] supp_epochs = self.p['epochs'][0] # Will update the param dict, so must keep that in memory batch_size_save = self.p['batch_size'] # Might want to change batch_size in retraining # Update params dict with the one from the trained model # self.p = a.params self.p['resume'] = [parameters.resume_model] self.p['initial_epoch'] = [initial_epoch] # Save initial epoch to be passed to Model self.p['epochs'][0] = initial_epoch+supp_epochs # Initial = last epoch of already trained model (is a list) self.p['batch_size'] = batch_size_save logging.warning("Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model"%parameters.resume_model) logging.info("Will train the model from epoch %d to %d"%(self.p['initial_epoch'][0],self.p['epochs'][0])) # Check if no already exists then change it -> avoids rewriting # # This is only valid in worker mode, not driver # no = 1 if self.task == '': # If done on frontend name = self.name while os.path.exists(os.path.join(parameters.path_model,self.name+'_'+str(no)+'.csv')): no +=1 if model_idx is not None: name += '_crossval%d'%model_idx self.name_model = name+'_'+str(no) else: # If job on cluster name = self.name if model_idx is not None: name += '_crossval%d'%model_idx self.name_model = name+'_'+self.task.replace('.pkl','') # Define scan object # self.h = Scan(x=self.x_train, # Training inputs y=self.y_train, # Training targets params=self.p, # Parameters dict dataset_name=self.name, # Name of experiment experiment_no=str(no), # Number of experiment model=getattr(Model,parameters.model),# Get the model in Model.py specified by parameters.py val_split=0.1, # How much data is to be used for val_loss reduction_metric='val_loss', # How to select best model #grid_downsample=0.1, # When used in serial mode #random_method='lhs', --- #reduction_method='spear', --- #reduction_window=1000, --- #reduction_interval=100, --- #last_epoch_value=True, --- print_params=True, # To print param at each job repetition=parameters.repetition, # Wether a set of parameters is to be trained several times path_model = parameters.path_model, # Where to save the model custom_objects=self.custom_objects, # Custom object : custom layer ) if not generator: # Use the save information in DF # self.h_with_eval = Autom8(scan_object = self.h, # the scan object x_val = self.x_val, # Evaluation inputs y_val = self.y_val[:,:-1],# Evaluatio targets (last column is weight) n = -1, # How many model to evaluate (n=-1 means all) folds = 5, # Cross-validation splits for nominal and errors metric = 'val_loss', # On what metric to sort asc = True, # Ascending because loss function shuffle = True, # Shuffle bfore evaluation average = 'micro') # Not useful here self.h_with_eval.data.to_csv(self.name_model+'.csv') # save to csv including error self.autom8 = True else: # Needs to use the generator evaluation # error_arr = np.zeros(self.h.data.shape[0]) for i in range(self.h.data.shape[0]): logging.info("Evaluating model %d"%i) # Load model # model_eval = model_from_json(self.h.saved_models[i],custom_objects=self.custom_objects) model_eval.set_weights(self.h.saved_weights[i]) model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy']) # Evaluate model # evaluation_generator = DataGenerator(path = parameters.path_gen_evaluation, inputs = parameters.inputs, outputs = parameters.outputs, batch_size = parameters.p['batch_size'][0], state_set = 'evaluation') eval_metric = model_eval.evaluate_generator(generator = evaluation_generator, workers = parameters.workers, use_multiprocessing = True) # Save errors # error_arr[i] = eval_metric[0] logging.info('Error is %f'%error_arr[i]) # Save evaluation error to csv # self.h.data['eval_mean'] = error_arr self.h.data.to_csv(self.name_model+'.csv') # save to csv including error self.autom8 = True # returns the experiment configuration details logging.info('='*80) logging.debug('Details') logging.debug(self.h.details)
metrics=['accuracy'], ) out = model.fit( x_train, y_train, validation_data=(x_val, y_val), batch_size=params['batch_size'], epochs=params['epochs'], verbose=2, ) return out, model p = {'first_filter': [64, 128, 256], 'batch_size': [32, 64, 128], 'epochs': [100, 500, 1000], 'min_lr': [4e-6, 1e-5, 7e-5, 0.01] } exp_name = 'forda_jako' t = Scan( x=x_train, y=y_train, x_val=x_val, y_val=y_val, params=p, model=FordaModel, experiment_name=exp_name)
# encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008 + [4]*811 # encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008 # <== # one hot 编码 dummy_Y = np_utils.to_categorical(encoder_Y) # train test split X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_Y, test_size=0.1, random_state=9) from keras.callbacks import EarlyStopping, ModelCheckpoint his = LossHistory() # call keras model # training h = Scan(x=X_train, y=Y_train, x_val=X_test, y_val=Y_test, params=parm, experiment_name='first_test', model=keras_model, fraction_limit=0.1) # from talos.utils.recover_best_model import recover_best_model # results, models = recover_best_model(x=X_train, # y=Y_train, # x_val=X_test, # y_val=Y_test, # experiment_log='minimal_iris.csv', # input_model=keras_model, # n_models=5, # task='multi_label')
class modelTrainer: def __init__(self, mode, plot=False, verbosity=1): self.mode = mode self.plot = plot self.history = None self.best_model = None self.test_proportion = 0.20 self.verbosity = verbosity if self.mode == 'DoS': self.input_dim = 21 self.dataset_path = 'training_datasets/DoS_dataset_1.csv' self.params = { 'activation1': [relu], 'activation2': [relu], 'optimizer': ['Nadam'], 'losses': ['mean_absolute_error'], 'first_hidden_layer': [20], 'second_hidden_layer': [20], 'dropout_probability': [0.2], 'batch_size': [8], 'epochs': [900] } elif self.mode == 'Elongation': self.input_dim = 26 self.dataset_path = 'training_datasets/Mechanical_dataset_1.csv' self.params = { 'activation1': [relu], 'activation2': [relu], 'optimizer': ['Nadam'], 'losses': ['mean_absolute_error'], 'first_hidden_layer': [25], 'second_hidden_layer': [25], 'dropout_probability': [0.2], 'batch_size': [8], 'epochs': [200] } elif self.mode == 'Tensile': self.input_dim = 26 self.dataset_path = 'training_datasets/Mechanical_dataset_1.csv' self.params = { 'activation1': [relu], 'activation2': [relu], 'optimizer': ['Nadam'], 'losses': ['mean_absolute_error'], 'first_hidden_layer': [25], 'second_hidden_layer': [25], 'dropout_probability': [0.2], 'batch_size': [8], 'epochs': [30] } elif self.mode == 'Yield': self.input_dim = 26 self.dataset_path = 'training_datasets/Mechanical_dataset_1.csv' self.params = { 'activation1': [relu], 'activation2': [relu], 'optimizer': ['Nadam'], 'losses': ['mean_absolute_error'], 'first_hidden_layer': [30], 'second_hidden_layer': [30], 'dropout_probability': [0.2], 'batch_size': [8], 'epochs': [60] } else: return self.dataset = {} self.loadDataset(self.dataset_path) self.run() def loadDataset(self, dataset_path): dataset = [] with open(dataset_path, "r") as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_reader) for lines in csv_reader: dataset.append(lines) dataset = np.asarray(dataset) np.random.seed(2349138) np.random.shuffle(dataset) if self.mode == 'Elongation': dataset = np.delete(dataset, self.input_dim + 2, 1) dataset = np.delete(dataset, self.input_dim + 1, 1) elif self.mode == 'Tensile': dataset = np.delete(dataset, self.input_dim + 2, 1) dataset = np.delete(dataset, self.input_dim, 1) elif self.mode == 'Yield': dataset = np.delete(dataset, self.input_dim + 1, 1) dataset = np.delete(dataset, self.input_dim, 1) for row in reversed(range(dataset.shape[0])): try: for column in range((dataset.shape[1])): dataset[row][column] = float(dataset[row][column]) except Exception: dataset = np.delete(dataset, row, 0) dataset = dataset.astype(np.float) self.dataset['full'] = dataset test_split = round(self.dataset['full'].shape[0] * self.test_proportion) self.dataset['testing'] = dataset[0:test_split, :] self.dataset['training'] = dataset[ test_split:self.dataset['full'].shape[0], :] def activateTestSplit(self): self.dataset['X_train'] = self.dataset['training'][:, 0:self.input_dim] self.dataset['Y_train'] = self.dataset['training'][:, self.input_dim] self.dataset['X_val'] = self.dataset['testing'][:, 0:self.input_dim] self.dataset['Y_val'] = self.dataset['testing'][:, self.input_dim] def model(self, X_train, Y_train, X_val, Y_val, params): model = Sequential() model.add( Dense(params['first_hidden_layer'], input_dim=self.input_dim, activation=params['activation1'], use_bias=True)) model.add(Dropout(params['dropout_probability'])) model.add( Dense(params['second_hidden_layer'], activation=params['activation2'], use_bias=True)) model.add(Dropout(params['dropout_probability'])) model.add(Dense(1, activation=linear)) model.compile(optimizer=params['optimizer'], loss=params['losses']) history = model.fit( self.dataset['X_train'], self.dataset['Y_train'], batch_size=params['batch_size'], epochs=params['epochs'], verbose=self.verbosity, validation_data=[self.dataset['X_val'], self.dataset['Y_val']]) self.history = history return history, model def run(self): self.activateTestSplit() self.best_model = Scan(self.dataset['X_train'], self.dataset['Y_train'], model=self.model, params=self.params, print_params=True, experiment_name=self.mode + '_model', reduction_metric='val_loss').best_model( metric='val_loss', asc=True) if self.plot: plt.plot(np.log(self.history.history['loss']) / np.log(40)) plt.plot(np.log(self.history.history['val_loss']) / np.log(40)) plt.title(self.mode + ' Training Results') plt.ylabel('Log(40) Linear Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() print('Model trained on ', self.dataset['X_train'].shape[0], ' samples, verified on ', self.dataset['X_val'].shape[0], ' samples') shutil.rmtree(self.mode + '_model') model_path = 'models/' + self.mode + '_model' + '_' + str( int(round(time.time() * 1000))) + '.h5' self.best_model.save(model_path) print('Model saved to: ' + model_path)
def HyperScan(self, data, list_inputs, list_outputs, task, generator=False, generator_weights=False, resume=False): """ Performs the scan for hyperparameters If task is specified, will load a pickle dict splitted from the whole set of parameters Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables) The column to be selected are given in list_inputs, list_outputs as lists of strings Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py """ logging.info(' Starting scan '.center(80, '-')) # Printing # logging.info('Number of features : %d' % len(list_inputs)) for name in list_inputs: logging.info('..... %s' % name) logging.info('Number of outputs : %d' % len(list_outputs)) for name in list_outputs: logging.info('..... %s' % name) # Records # if not generator: self.x = data[list_inputs].values self.y = data[list_outputs + ['learning_weights']].values # Data splitting # size = parameters.training_ratio / (parameters.training_ratio + parameters.validation_ratio) self.x_train, self.x_val, self.y_train, self.y_val = train_test_split( self.x, self.y, train_size=size) logging.info("Training set : %d" % self.x_train.shape[0]) logging.info("Evaluation set : %d" % self.x_val.shape[0]) else: dummyX = np.ones((1, len(list_inputs))) dummyY = np.ones( (1, len(list_outputs) + 1)) # emulates output + weights self.x_train = dummyX self.y_train = dummyY self.x_val = dummyX self.y_val = dummyY # Talos hyperscan parameters # self.task = task if self.task != '': # if task is specified load it otherwise get it from parameters.py with open( os.path.join(parameters.main_path, 'split', self.name, self.task), 'rb') as f: self.p = pickle.load(f) else: # We need the full dict self.p = parameters.p # If resume, puts it as argument ot be passed to function # # Also, needs to change the dictionary parameters for the one in the imported model # if resume: logging.info("Will resume training of model %s" % parameters.resume_model) # Get model and extract epoch range # a = Restore(parameters.resume_model, custom_objects=self.custom_objects) initial_epoch = a.params['epochs'][0] supp_epochs = self.p['epochs'][ 0] # Will update the param dict, so must keep that in memory batch_size_save = self.p[ 'batch_size'] # Might want to change batch_size in retraining # Update params dict with the one from the trained model # self.p = a.params self.p['resume'] = [parameters.resume_model] self.p['initial_epoch'] = [ initial_epoch ] # Save initial epoch to be passed to Model self.p['epochs'][ 0] = initial_epoch + supp_epochs # Initial = last epoch of already trained model (is a list) self.p['batch_size'] = batch_size_save logging.warning( "Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model" % parameters.resume_model) logging.info("Will train the model from epoch %d to %d" % (self.p['initial_epoch'][0], self.p['epochs'][0])) # Specify that weights should be used by generator # if generator_weights: self.p['generator_weights'] = [ True ] # Add to dictionary to be passed to Model # Check if no already exists then change it -> avoids rewriting # no = 1 if self.task == '': # If done on frontend self.name = self.name + '_' + self.sample self.path_model = os.path.join(parameters.main_path, 'model', self.name) while os.path.exists( os.path.join(parameters.path_model, self.name + '_' + str(no) + '.csv')): no += 1 self.name_model = self.name + '_' + str(no) else: # If job on cluster self.name_model = self.name + '_' + self.sample + self.task.replace( '.pkl', '') # Define scan object # #parallel_gpu_jobs(0.5) self.h = Scan( x=self.x_train, y=self.y_train, params=self.p, dataset_name=self.name, experiment_no=str(no), model=getattr(Model, parameters.model), val_split=0.1, reduction_metric='val_loss', #grid_downsample=0.1, #random_method='lhs', #reduction_method='spear', #reduction_window=1000, #reduction_interval=100, #last_epoch_value=True, print_params=True, repetition=parameters.repetition, path_model=parameters.path_model, custom_objects=self.custom_objects, ) if not generator: self.h_with_eval = Autom8( scan_object=self.h, x_val=self.x_val, y_val=self.y_val[:, :-1], # last column is weight n=-1, folds=10, metric='val_loss', asc=True, shuffle=True, average=None) self.h_with_eval.data.to_csv(self.name_model + '.csv') # save to csv including error self.autom8 = True else: error_arr = np.zeros(self.h.data.shape[0]) for i in range(self.h.data.shape[0]): logging.info("Evaluating model %d" % i) model_eval = model_from_json( self.h.saved_models[i], custom_objects=self.custom_objects) model_eval.set_weights(self.h.saved_weights[i]) #model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy']) model_eval.compile(optimizer=Adam(), loss={'OUT': mean_squared_error}, metrics=['accuracy']) evaluation_generator = DataGenerator( path=parameters.path_gen_evaluation, inputs=parameters.inputs, outputs=parameters.outputs, batch_size=parameters.p['batch_size'][0], state_set='evaluation') eval_metric = model_eval.evaluate_generator( generator=evaluation_generator, workers=parameters.workers, use_multiprocessing=True) error_arr[i] = eval_metric[0] logging.info('Error is %f' % error_arr[i]) self.h.data['eval_mean'] = error_arr self.h.data.to_csv(self.name_model + '.csv') # save to csv including error self.autom8 = True # returns the experiment configuration details logging.info('=' * 80) logging.debug('Details') logging.debug(self.h.details)