def test_integration(self): """Does most of the operations in the tutorial and uses many of mcfly's functionalities consecutively.""" X_train, X_val, y_train, y_val = self.generate_random_data_sets() num_classes = y_train.shape[1] models = modelgen.generate_models( X_train.shape, number_of_classes=num_classes, number_of_models=2, model_type='CNN') # Because CNNs are quick to train. histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, models, nr_epochs=5, subset_size=150, verbose=True, outputfile=self.outputfile) best_model_index = np.argmax(val_accuracies) best_model, best_params, best_model_types = models[best_model_index] history = best_model.fit(X_train[:200, :, :], y_train[:200, :], epochs=2, validation_data=(X_val, y_val)) best_model.save(self.modelfile) model_reloaded = load_model(self.modelfile) assert os.path.exists(self.outputfile) assert os.path.exists(self.modelfile)
def train_model(model, xtrain, ytrain, xval, yval, epochs, i): train_set_size = xtrain.shape[0] #print(xtrain.shape) #print(ytrain.shape) #print(xval.shape) #print(yval.shape) histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( xtrain, ytrain, xval, yval, model, nr_epochs=epochs, subset_size=train_set_size, verbose=False) best_model_index = np.argmax(val_accuracies) best_model, best_params, best_model_types = model[best_model_index] #logging.info(best_model_index, best_model_types, best_params) nr_epochs = epochs train_start = time.time() history = best_model.fit(xtrain, ytrain, epochs=nr_epochs, validation_data=(xval, yval), verbose=False) train_end = time.time() train_time = train_end - train_start logging.info("TRAINTIME: Training time Iteration " + str(i + 1) + ": " + str(train_time)) return history, best_model
def test_train_models_on_samples_with_dataset(self): """ Model should be able to train using a dataset as an input """ num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand(num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) batch_size = 20 data_train = tf.data.Dataset.from_tensor_slices( (X_train, y_train)).batch(batch_size) data_val = tf.data.Dataset.from_tensor_slices( (X_val, y_val)).batch(batch_size) custom_settings = get_default_settings() model_type = CNN(X_train.shape, 2, **custom_settings) hyperparams = model_type.generate_hyperparameters() model = model_type.create_model(**hyperparams) models = [(model, hyperparams, "CNN")] histories, val_metrics, val_losses = \ find_architecture.train_models_on_samples( data_train, None, data_val, None, models, nr_epochs=1, subset_size=None, verbose=False, outputfile=None, early_stopping_patience='auto', batch_size=batch_size)
def train_model(model, xtrain, ytrain, xval, yval, epochs): train_set_size = xtrain.shape[0] #print(xtrain.shape) #print(ytrain.shape) #print(xval.shape) #print(yval.shape) histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( xtrain, ytrain, xval, yval, model, nr_epochs=epochs, subset_size=train_set_size, verbose=False) best_model_index = np.argmax(val_accuracies) best_model, best_params, best_model_types = model[best_model_index] #logging.info(best_model_index, best_model_types, best_params) nr_epochs = epochs history = best_model.fit(xtrain, ytrain, epochs=nr_epochs, validation_data=(xval, yval), verbose=False) return history, best_model
def test_train_models_on_samples_with_x_and_y(self): """ Model should be able to train using separated x and y values """ num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand(num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) batch_size = 20 custom_settings = get_default_settings() model_type = CNN(X_train.shape, 2, **custom_settings) hyperparams = model_type.generate_hyperparameters() model = model_type.create_model(**hyperparams) models = [(model, hyperparams, "CNN")] histories, _, _ = \ find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, models, nr_epochs=1, subset_size=10, verbose=False, outputfile=None, early_stopping_patience='auto', batch_size=batch_size) assert len(histories) == 1
def test_integration(self): """Does most of the operations in the tutorial and uses many of mcfly's functionalities consecutively.""" X_train, X_val, y_train, y_val = self.generate_random_data_sets() num_classes = y_train.shape[1] metric = 'accuracy' models = modelgen.generate_models( X_train.shape, number_of_classes=num_classes, number_of_models=2, metrics=[metric], model_type='CNN') # Because CNNs are quick to train. histories, val_accuracies, _ = find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, models, nr_epochs=5, subset_size=150, verbose=True, outputfile=self.outputfile) best_model_index = np.argmax(val_accuracies[metric]) best_model, _, _ = models[best_model_index] _ = best_model.fit(X_train[:200, :, :], y_train[:200, :], epochs=2, validation_data=(X_val, y_val)) best_model.save(self.modelfile) model_reloaded = load_model(self.modelfile) assert model_reloaded is not None, "Expected model" #TODO: check if it's a real model assert len(histories) == 2, "Expected two models in histories" assert os.path.exists(self.outputfile) assert os.path.exists(self.modelfile)
def test_train_models_on_samples_with_generators(self): """ Model should be able to train using a generator as an input """ num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand(num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) batch_size = 20 class DataGenerator(Sequence): def __init__(self, x_set, y_set, batch_size): self.x, self.y = x_set, y_set self.batch_size = batch_size def __len__(self): return math.ceil(len(self.x) / self.batch_size) def __getitem__(self, idx): batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] return batch_x, batch_y data_train = DataGenerator(X_train, y_train, batch_size) data_val = DataGenerator(X_val, y_val, batch_size) custom_settings = get_default_settings() model_type = CNN(X_train.shape, 2, **custom_settings) hyperparams = model_type.generate_hyperparameters() model = model_type.create_model(**hyperparams) models = [(model, hyperparams, "CNN")] histories, _, _ = \ find_architecture.train_models_on_samples( data_train, None, data_val, None, models, nr_epochs=1, subset_size=None, verbose=False, outputfile=None, early_stopping_patience='auto', batch_size=batch_size) assert len(histories) == 1
def train_models_on_samples_empty(self): num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand(num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) histories, val_metrics, val_losses = \ find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, [], nr_epochs=1, subset_size=10, verbose=False, outputfile=None, early_stopping=False, batch_size=20, metric='accuracy') assert len(histories) == 0
def train_models_on_samples_empty(self): np.random.seed(123) num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand( num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) histories, val_metrics, val_losses = \ find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, [], nr_epochs=1, subset_size=10, verbose=False, outputfile=None, early_stopping=False, batch_size=20, metric='accuracy') assert len(histories) == 0
def test_train_models_on_samples_empty(self): np.random.seed(123) num_timesteps = 100 num_channels = 2 num_samples_train = 5 num_samples_val = 3 X_train = np.random.rand( num_samples_train, num_timesteps, num_channels) y_train = to_categorical(np.array([0, 0, 1, 1, 1])) X_val = np.random.rand(num_samples_val, num_timesteps, num_channels) y_val = to_categorical(np.array([0, 1, 1])) def run(wf): return noodles.run_process(wf, n_processes=4, registry=serial_registry) histories, val_metrics, val_losses = \ find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, [], nr_epochs=1, subset_size=10, verbose=False, outputfile=None, early_stopping=False, batch_size=20, metric='accuracy', use_noodles=run) assert len(histories) == 0
for j in range(len(Xs)): print('fold ' + str(j)) models = [(get_fresh_copy(model, params['learning_rate']), params, model_type) for model, params, model_type in models] X_train, y_train, X_val, y_val = split_train_small_val(Xs, ys, j, trainsize=trainsize, valsize=valsize) histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, models, nr_epochs=nr_epochs, subset_size=subset_size, verbose=True, outputfile=os.path.join(resultpath, 'experiment' + str(j) + '.json'), early_stopping=True) histories_list.append(histories) val_accuracies_list.append(val_accuracies) val_losses.append(val_losses) print(time.time() - t) # In[6]: # Read them all back in import json model_jsons = []
#what is the fraction of classes in the validation set? pd.Series(y_val.mean(axis=0), index=labels) if not os.path.exists(result_path): os.makedirs(result_path) histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(X_train, y_train, X_val, y_val, models,nr_epochs=5, subset_size=512, verbose=True, batch_size=32, outputpath=result_path, early_stopping=True) print('Details of the training process were stored in ',os.path.join(result_path, 'models.json')) best_model_index = np.argmax(val_accuracies) best_model, best_params, best_model_types = models[best_model_index] print('Model type and parameters of the best model:') print(best_model_types) print(best_params)
# In[13]: # Define directory where the results, e.g. json file, will be stored resultpath = os.path.join(data_path, '..', 'data/models') if not os.path.exists(resultpath): os.makedirs(resultpath) # In[14]: outputfile = os.path.join(resultpath, 'modelcomparison_pamap.json') histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( X_train, y_train_binary, X_val, y_val_binary, models, nr_epochs=5, subset_size=1000, verbose=True, outputfile=outputfile) print('Details of the training process were stored in ', outputfile) # In[15]: best_model_index = np.argmax(val_accuracies) best_model, best_params, best_model_types = models[best_model_index] print('Model type and parameters of the best model:') print(best_model_types) print(best_params) # ## Train the best model on the full dataset
number_of_models=number_of_models) # In[6]: #what is the fraction of a vs c in the validation set? y_val.mean(axis=0) # In[7]: if not os.path.exists(result_path): os.makedirs(result_path) # In[ ]: outputfile = os.path.join(result_path, 'modelcomparison.json') histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( X_train, y_train, X_val, y_val, models, nr_epochs=nr_epochs, subset_size=subset_size, verbose=True, batch_size=batch_size, outputfile=outputfile, early_stopping=early_stopping) print('Details of the training process were stored in ', outputfile) # In[ ]:
def main(argv): infile = argv[0] outdir = argv[1] sleep_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM'] if not os.path.exists(outdir): os.makedirs(outdir) resultdir = os.path.join(outdir, 'models') if not os.path.exists(resultdir): os.makedirs(resultdir) all_data = np.load(infile) X = all_data['data'] y = all_data['labels'] users = all_data['user'] dataset = all_data['dataset'] X = X[dataset == 'UPenn'] y = y[dataset == 'UPenn'] num_classes = y.shape[1] # Shuffle data shuf_idx = np.arange(X.shape[0]) np.random.shuffle(shuf_idx) X = X[shuf_idx] y = y[shuf_idx] users = [users[i] for i in shuf_idx] # Get small subset #idx = np.random.randint(X.shape[0],size=10000) #X = X[idx]; y = y[idx]; users = [users[i] for i in idx] y_lbl = y.argmax(axis=1) y_lbl = [sleep_states[i] for i in y_lbl] # Use nested cross-validation based on users # Outer CV outer_cv_splits = 5 inner_cv_splits = 3 group_kfold = GroupKFold(n_splits=outer_cv_splits) fold = 0 predictions = [] for train_indices, test_indices in group_kfold.split(X, y, users): fold += 1 print('Evaluating fold %d' % fold) out_X_train = X[train_indices] out_y_train = y[train_indices] naug_samp = augment(out_X_train, out_y_train, sleep_states, fold=fold, aug_factor=1.5) out_X_train = np.memmap('tmp/X_aug_fold'+str(fold)+'.np', dtype='float32', mode='r', \ shape=(naug_samp,out_X_train.shape[1],out_X_train.shape[2])) out_y_train = np.memmap('tmp/y_aug_fold' + str(fold) + '.np', dtype='int32', mode='r', shape=(naug_samp, out_y_train.shape[1])) out_X_test = X[test_indices] out_y_test = y[test_indices] out_lbl = out_y_train.argmax(axis=1) # Inner CV val_acc = [] models = [] strat_kfold = StratifiedKFold(n_splits=inner_cv_splits, random_state=0, shuffle=False) for grp_train_indices, grp_test_indices in strat_kfold.split( out_X_train, out_lbl): grp_train_indices = sample(list(grp_train_indices), len(grp_train_indices)) in_X_train = out_X_train[grp_train_indices] in_y_train = out_y_train[grp_train_indices] grp_test_indices = sample(list(grp_test_indices), 1000) in_X_test = out_X_train[grp_test_indices] in_y_test = out_y_train[grp_test_indices] #print(Counter(in_y_train[:1000].argmax(axis=1))); continue limit_mem() # Generate candidate architectures model = modelgen.generate_models(in_X_train.shape, \ number_of_classes=num_classes, \ number_of_models=1, metrics=[macro_f1], model_type='CNN') # Compare generated architectures on a subset of data for few epochs outfile = os.path.join(resultdir, 'model_comparison.json') hist, acc, loss = find_architecture.train_models_on_samples(in_X_train, \ in_y_train, in_X_test, in_y_test, model, nr_epochs=5, \ subset_size=5000, verbose=True, batch_size=50, \ outputfile=outfile, metric='macro_f1') val_acc.append(acc[0]) models.append(model[0]) # Choose best model and evaluate values on validation data print('Evaluating on best model for fold %d' % fold) best_model_index = np.argmax(val_acc) best_model, best_params, best_model_type = models[best_model_index] print('Best model type and parameters:') print(best_model_type) print(best_params) nr_epochs = 5 ntrain = out_X_train.shape[0] nval = ntrain // 5 val_idx = np.random.randint(ntrain, size=nval) train_idx = [ i for i in range(out_X_train.shape[0]) if i not in val_idx ] trainX = out_X_train[train_idx] trainY = out_y_train[train_idx] valX = out_X_train[val_idx] valY = out_y_train[val_idx] limit_mem() best_model = modelgen.generate_CNN_model(trainX.shape, num_classes, filters=best_params['filters'], \ fc_hidden_nodes=best_params['fc_hidden_nodes'], \ learning_rate=best_params['learning_rate'], \ regularization_rate=best_params['regularization_rate'], \ metrics=[macro_f1]) history = best_model.fit(trainX, trainY, epochs=nr_epochs, batch_size=50, \ validation_data=(valX, valY)) # Save model best_model.save( os.path.join(resultdir, 'best_model_fold' + str(fold) + '.h5')) # Predict probability on validation data probs = best_model.predict_proba(out_X_test, batch_size=1) y_pred = probs.argmax(axis=1) y_true = out_y_test.argmax(axis=1) predictions.append((y_true, y_pred)) get_classification_report(predictions, sleep_states)
print(" ") print("Model description:") model.summary() print(" ") print("Model type:") print(model_types) print(" ") modelcomparison = "modelcomparison4" outputfile = path_predict+modelcomparison+'.json' histories, val_metrics, val_losses = find_architecture.train_models_on_samples(X_train, y_train, X_val, y_val, models,nr_epochs=3, subset_size=80000, verbose=True, #metric='acc', outputfile=outputfile) print(np.asarray(val_metrics).shape) print(val_metrics) print(len(histories)) sys.exit() modelcomparisons = pd.DataFrame({'model':[str(params) for model, params, model_types in models], 'train_acc': [history.history['acc'][-1] for history in histories], 'train_loss': [history.history['loss'][-1] for history in histories], 'val_acc': [history.history['val_acc'][-1] for history in histories], 'val_loss': [history.history['val_loss'][-1] for history in histories] })
X_train = np.array(X_train) Y_train = np.array(Y_train) X_test = np.array(X_test) Y_test = np.array(Y_test) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1) num_classes = Y_train.shape[1] models = modelgen.generate_models(X_train.shape, number_of_classes=num_classes, number_of_models=1) histories, val_accuracies, val_losses = find_architecture.train_models_on_samples( X_train, Y_train, X_test, Y_test, models, nr_epochs=1, subset_size=300, verbose=True, outputfile=outputfile) print('Details of the training process were stored in ', outputfile) modelcomparisons = pd.DataFrame({ 'model': [str(params) for model, params, model_types in models], 'train_acc': [history.history['accuracy'][-1] for history in histories], 'train_loss': [history.history['loss'][-1] for history in histories], 'val_accuracy': [history.history['val_accuracy'][-1] for history in histories], 'val_loss': [history.history['val_loss'][-1] for history in histories] })
def main(argv): indir = argv[0] mode = argv[1] # binary or multiclass outdir = argv[2] if mode == 'multiclass': sleep_states = [ 'Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM', 'Nonwear', 'Wake_ext' ] else: sleep_states = ['Wake', 'Sleep', 'Nonwear', 'Wake_ext'] collate_sleep = ['NREM 1', 'NREM 2', 'NREM 3', 'REM'] valid_sleep_states = [ state for state in sleep_states if state != 'Wake_ext' ] num_classes = len(valid_sleep_states) if not os.path.exists(outdir): os.makedirs(outdir) resultdir = os.path.join(outdir, mode, 'models') if not os.path.exists(resultdir): os.makedirs(resultdir) # Read data from disk data = pd.read_csv(os.path.join(indir, 'labels.txt'), sep='\t') files = [] labels = [] users = [] for idx, row in data.iterrows(): files.append(os.path.join(indir, row['filename']) + '.npy') labels.append(row['labels']) users.append(row['user']) if mode == 'binary': labels = ['Sleep' if lbl in collate_sleep else lbl for lbl in labels] early_stopping = EarlyStopping(monitor='val_macro_f1', mode='max', verbose=1, patience=2) seqlen, n_channels = np.load(files[0]).shape batch_size = 32 # Use nested cross-validation based on users # Outer CV unique_users = list(set(users)) random.shuffle(unique_users) out_cv_splits = 5 in_cv_splits = 5 out_fold_nusers = len(unique_users) // out_cv_splits out_n_epochs = 10 in_n_epochs = 1 predictions = [] wake_idx = sleep_states.index('Wake') wake_ext_idx = sleep_states.index('Wake_ext') for out_fold in range(out_cv_splits): print('Evaluating fold %d' % (out_fold + 1)) test_users = unique_users[out_fold * out_fold_nusers:(out_fold + 1) * out_fold_nusers] trainval_users = [ user for user in unique_users if user not in test_users ] train_users = trainval_users[:int(0.8 * len(trainval_users))] val_users = trainval_users[len(train_users):] out_train_fnames, out_train_labels, out_train_users = get_partition(files, labels, users, train_users,\ sleep_states, is_train=True) out_val_fnames, out_val_labels, out_val_users = get_partition( files, labels, users, val_users, sleep_states) out_test_fnames, out_test_labels, out_test_users = get_partition( files, labels, users, test_users, sleep_states) out_train_gen = DataGenerator(out_train_fnames, out_train_labels, valid_sleep_states, partition='out_train',\ batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\ n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True) print( 'Fold {}: Computing mean and standard deviation'.format(out_fold + 1)) mean, std = out_train_gen.fit() #mean = None; std = None out_val_gen = DataGenerator(out_val_fnames, out_val_labels, valid_sleep_states, partition='out_val',\ batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\ n_classes=num_classes, mean=mean, std=std) out_test_gen = DataGenerator(out_test_fnames, out_test_labels, valid_sleep_states, partition='out_test',\ batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\ n_classes=num_classes, mean=mean, std=std) # Get class weights out_class_wts = class_weight.compute_class_weight( 'balanced', np.unique(out_train_labels), out_train_labels) # Inner CV val_acc = [] models = [] in_fold_nusers = len(trainval_users) // in_cv_splits for in_fold in range(in_cv_splits): in_val_users = trainval_users[in_fold * in_fold_nusers:(in_fold + 1) * in_fold_nusers] in_train_users = [ user for user in trainval_users if user not in in_val_users ] in_train_fnames, in_train_labels, in_train_users = get_partition(files, labels, users, in_train_users,\ sleep_states, is_train=True) in_val_fnames, in_val_labels, in_val_users = get_partition( files, labels, users, in_val_users, sleep_states) in_train_gen = DataGenerator(in_train_fnames, in_train_labels, valid_sleep_states, partition='in_train',\ batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\ n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True,\ mean=mean, std=std) in_val_gen = DataGenerator(in_val_fnames, in_val_labels, valid_sleep_states, partition='in_val',\ batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\ n_classes=num_classes, mean=mean, std=std) # Generate candidate architectures model = modelgen.generate_models((None, seqlen, n_channels), \ number_of_classes=num_classes, \ number_of_models=1, metrics=[macro_f1])#, model_type='CNN') # Compare generated architectures on a subset of data for few epochs outfile = os.path.join(resultdir, 'model_comparison.json') hist, acc, loss = find_architecture.train_models_on_samples(in_train_gen, in_val_gen, model, nr_epochs=in_n_epochs, n_steps=1000, class_weight=out_class_wts, \ verbose=True, outputfile=outfile, metric='macro_f1') val_acc.append(acc[0]) models.append(model[0]) # Choose best model and evaluate values on validation data print('Evaluating on best model for fold %d' % out_fold) best_model_index = np.argmax(val_acc) best_model, best_params, best_model_type = models[best_model_index] print('Best model type and parameters:') print(best_model_type) print(best_params) if best_model_type == 'CNN': best_model = modelgen.generate_CNN_model((None, seqlen, n_channels), num_classes, filters=best_params['filters'], \ fc_hidden_nodes=best_params['fc_hidden_nodes'], \ learning_rate=best_params['learning_rate'], \ regularization_rate=best_params['regularization_rate'], \ metrics=[macro_f1]) else: best_model = modelgen.generate_DeepConvLSTM_model((None, seqlen, n_channels), num_classes,\ filters=best_params['filters'], \ lstm_dims=best_params['lstm_dims'], \ learning_rate=best_params['learning_rate'], \ regularization_rate=best_params['regularization_rate'], \ metrics=[macro_f1]) # Use early stopping and model checkpoints to handle overfitting and save best model model_checkpt = ModelCheckpoint(os.path.join(resultdir,'best_model_fold'+str(out_fold+1)+'.h5'), monitor='val_macro_f1',\ mode='max', save_best_only=True) history = F1scoreHistory() hist = best_model.fit_generator(out_train_gen, epochs=out_n_epochs, \ validation_data=out_val_gen, class_weight=out_class_wts,\ callbacks=[early_stopping, model_checkpt]) # Plot training history # plt.Figure() # plt.plot(history.mean_f1score['train']) # #plt.plot(history.mean_f1score['val']) # plt.title('Model F1-score') # plt.ylabel('F1-score') # plt.xlabel('Batch') # #plt.legend(['Train', 'Test'], loc='upper left') # plt.savefig(os.path.join(resultdir,'Fold'+str(fold)+'_performance_curve.jpg')) # plt.clf() # ## # Save model ## best_model.save(os.path.join(resultdir,'best_model_fold'+str(fold)+'.h5')) # Predict probability on validation data probs = best_model.predict_generator(out_test_gen) y_pred = probs.argmax(axis=1) y_true = out_test_labels predictions.append((out_test_users, y_true, y_pred)) # Save user report if mode == 'binary': save_user_report( predictions, valid_sleep_states, os.path.join( resultdir, 'fold' + str(out_fold + 1) + '_deeplearning_binary_results.csv')) else: save_user_report( predictions, valid_sleep_states, os.path.join( resultdir, 'fold' + str(out_fold + 1) + '_deeplearning_multiclass_results.csv')) get_classification_report(predictions, valid_sleep_states) # Save user report if mode == 'binary': save_user_report( predictions, valid_sleep_states, os.path.join(resultdir, 'deeplearning_binary_results.csv')) else: save_user_report( predictions, valid_sleep_states, os.path.join(resultdir, 'deeplearning_multiclass_results.csv'))