def train(images, labels, fold, model_type, batch_size, num_epochs, subj_id=0, reuse_cnn=False, dropout_rate=dropout_rate, learning_rate_default=1e-3, Optimizer=tf.train.AdamOptimizer, log_path=log_path): """ A sample training function which loops over the training set and evaluates the network on the validation set after each epoch. Evaluates the network on the training set whenever the :param images: input images :param labels: target labels :param fold: tuple of (train, test) index numbers :param model_type: model type ('cnn', '1dconv', 'lstm', 'mix') :param batch_size: batch size for training :param num_epochs: number of epochs of dataset to go over for training :param subj_id: the id of fold for storing log and the best model :param reuse_cnn: whether to train cnn first, and load its weight for multi-frame model :return: none """ with tf.name_scope('Inputs'): input_var = tf.placeholder(tf.float32, [None, None, 32, 32, n_colors], name='X_inputs') target_var = tf.placeholder(tf.int64, [None], name='y_inputs') tf_is_training = tf.placeholder(tf.bool, None, name='is_training') num_classes = len(np.unique(labels)) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) print('Train set label and proportion:\t', np.unique(y_train, return_counts=True)) print('Val set label and proportion:\t', np.unique(y_val, return_counts=True)) print('Test set label and proportion:\t', np.unique(y_test, return_counts=True)) print('The shape of X_trian:\t', X_train.shape) print('The shape of X_val:\t', X_val.shape) print('The shape of X_test:\t', X_test.shape) print("Building model and compiling functions...") if model_type == '1dconv': network = build_convpool_conv1d(input_var, num_classes, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Conv1d' + '_sbj' + str(subj_id)) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_LSTM' + '_sbj' + str(subj_id)) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Mix' + '_sbj' + str(subj_id)) elif model_type == 'cnn': with tf.name_scope(name='CNN_layer' + '_fold' + str(subj_id)): network = build_cnn(input_var) # output shape [None, 4, 4, 128] convpool_flat = tf.reshape(network, [-1, 4 * 4 * 128]) h_fc1_drop1 = tf.layers.dropout(convpool_flat, rate=dropout_rate, training=tf_is_training, name='dropout_1') h_fc1 = tf.layers.dense(h_fc1_drop1, 256, activation=tf.nn.relu, name='fc_relu_256') h_fc1_drop2 = tf.layers.dropout(h_fc1, rate=dropout_rate, training=tf_is_training, name='dropout_2') network = tf.layers.dense(h_fc1_drop2, num_classes, name='fc_softmax') # the loss function contains the softmax activation else: raise ValueError( "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") Train_vars = tf.trainable_variables() prediction = network with tf.name_scope('Loss'): l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in Train_vars if 'kernel' in v.name]) ce_loss = tf.losses.sparse_softmax_cross_entropy(labels=target_var, logits=prediction) _loss = ce_loss + weight_decay * l2_loss # decay_steps learning rate decay decay_steps = 3 * ( len(y_train) // batch_size ) # len(X_train)//batch_size the training steps for an epcoh with tf.name_scope('Optimizer'): # learning_rate = learning_rate_default * Decay_rate^(global_steps/decay_steps) global_steps = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay( # learning rate decay learning_rate_default, # Base learning rate. global_steps, decay_steps, 0.95, # Decay rate. staircase=True) optimizer = Optimizer( learning_rate) # GradientDescentOptimizer AdamOptimizer train_op = optimizer.minimize(_loss, global_step=global_steps, var_list=Train_vars) with tf.name_scope('Accuracy'): prediction = tf.argmax(prediction, axis=1) correct_prediction = tf.equal(prediction, target_var) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Output directory for models and summaries # choose different path for different model and subject out_dir = os.path.abspath( os.path.join(os.path.curdir, log_path, (model_type + '_' + str(subj_id)))) print("Writing to {}\n".format(out_dir)) # Summaries for loss, accuracy and learning_rate loss_summary = tf.summary.scalar('loss', _loss) acc_summary = tf.summary.scalar('train_acc', accuracy) lr_summary = tf.summary.scalar('learning_rate', learning_rate) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, lr_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, tf.get_default_graph()) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, tf.get_default_graph()) # Test summaries test_summary_op = tf.summary.merge([loss_summary, acc_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.summary.FileWriter(test_summary_dir, tf.get_default_graph()) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, model_type) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if model_type != 'cnn' and reuse_cnn: # saver for reuse the CNN weight reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='VGG_NET_CNN') original_saver = tf.train.Saver( reuse_vars) # Pass the variables as a list saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) print("Starting training...") total_start_time = time.time() best_validation_accu = 0 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) if model_type != 'cnn' and reuse_cnn: cnn_model_path = os.path.abspath( os.path.join(os.path.curdir, log_path, ('cnn_' + str(subj_id)), 'checkpoints')) cnn_model_path = tf.train.latest_checkpoint(cnn_model_path) print('-' * 20) print('Load cnn model weight for multi-frame model from {}'.format( cnn_model_path)) original_saver.restore(sess, cnn_model_path) stop_count = 0 # count for earlystopping for epoch in range(num_epochs): print('-' * 50) # Train set train_err = train_acc = train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch summary, _, pred, loss, acc = sess.run( [train_summary_op, train_op, prediction, _loss, accuracy], { input_var: inputs, target_var: targets, tf_is_training: True }) train_acc += acc train_err += loss train_batches += 1 train_summary_writer.add_summary(summary, sess.run(global_steps)) av_train_err = train_err / train_batches av_train_acc = train_acc / train_batches # Val set summary, pred, av_val_err, av_val_acc = sess.run( [dev_summary_op, prediction, _loss, accuracy], { input_var: X_val, target_var: y_val, tf_is_training: False }) dev_summary_writer.add_summary(summary, sess.run(global_steps)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) fmt_str = "Train \tEpoch [{:d}/{:d}] train_Loss: {:.4f}\ttrain_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_train_err, av_train_acc * 100) print(print_str) fmt_str = "Val \tEpoch [{:d}/{:d}] val_Loss: {:.4f}\tval_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_val_err, av_val_acc * 100) print(print_str) # Test set summary, pred, av_test_err, av_test_acc = sess.run( [test_summary_op, prediction, _loss, accuracy], { input_var: X_test, target_var: y_test, tf_is_training: False }) test_summary_writer.add_summary(summary, sess.run(global_steps)) fmt_str = "Test \tEpoch [{:d}/{:d}] test_Loss: {:.4f}\ttest_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_test_err, av_test_acc * 100) print(print_str) if av_val_acc > best_validation_accu: # early_stoping stop_count = 0 eraly_stoping_epoch = epoch best_validation_accu = av_val_acc test_acc_val = av_test_acc saver.save(sess, checkpoint_prefix, global_step=sess.run(global_steps)) else: stop_count += 1 if stop_count >= 10: # stop training if val_acc dose not imporve for over 10 epochs break train_batches = train_acc = 0 for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch acc = sess.run(accuracy, { input_var: X_train, target_var: y_train, tf_is_training: False }) train_acc += acc train_batches += 1 last_train_acc = train_acc / train_batches last_val_acc = av_val_acc last_test_acc = av_test_acc print('-' * 50) print('Time in total:', time.time() - total_start_time) print("Best validation accuracy:\t\t{:.2f} %".format( best_validation_accu * 100)) print( "Test accuracy when got the best validation accuracy:\t\t{:.2f} %". format(test_acc_val * 100)) print('-' * 50) print("Last train accuracy:\t\t{:.2f} %".format(last_train_acc * 100)) print("Last validation accuracy:\t\t{:.2f} %".format(last_val_acc * 100)) print("Last test accuracy:\t\t\t\t{:.2f} %".format(last_test_acc * 100)) print('Early Stopping at epoch: {}'.format(eraly_stoping_epoch + 1)) train_summary_writer.close() dev_summary_writer.close() test_summary_writer.close() return [ last_train_acc, best_validation_accu, test_acc_val, last_val_acc, last_test_acc ]
def train(images, labels, fold, model_type, batch_size=32, num_epochs=5): """ A sample training function which loops over the training set and evaluates the network on the validation set after each epoch. Evaluates the network on the training set whenever the :param images: input images :param labels: target labels :param fold: tuple of (train, test) index numbers :param model_type: model type ('cnn', '1dconv', 'maxpool', 'lstm', 'mix') :param batch_size: batch size for training :param num_epochs: number of epochs of dataset to go over for training :return: none """ num_classes = len(np.unique(labels)) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # Prepare Theano variables for inputs and targets input_var = T.TensorType('floatX', ((False, ) * 5))() target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") # Building the appropriate model if model_type == '1dconv': network = build_convpool_conv1d(input_var, num_classes) elif model_type == 'maxpool': network = build_convpool_max(input_var, num_classes) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100) elif model_type == 'cnn': input_var = T.tensor4('inputs') network, _ = build_cnn(input_var) network = DenseLayer(lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) network = DenseLayer(lasagne.layers.dropout(network, p=.5), num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax) else: raise ValueError( "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() reg_factor = 1e-4 l2_penalty = regularize_network_params(network, l2) * reg_factor loss += l2_penalty params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.001) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") best_validation_accu = 0 # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 av_train_err = train_err / train_batches av_val_err = val_err / val_batches av_val_acc = val_acc / val_batches # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_err)) print(" validation loss:\t\t{:.6f}".format(av_val_err)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) if av_val_acc > best_validation_accu: best_validation_accu = av_val_acc # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 av_test_err = test_err / test_batches av_test_acc = test_acc / test_batches print("Final results:") print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: np.savez('weights_lasg_{0}'.format(model_type), *lasagne.layers.get_all_param_values(network)) print('-' * 50) print("Best validation accuracy:\t\t{:.2f} %".format(best_validation_accu * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) return av_test_acc
def main(): # Load the dataset print("Loading data...") data, labels = load_data(filename) mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True) subjNumbers = np.squeeze(mat['subjectNum']) # subject IDs for each trial # Create folds based on subject numbers (for leave-subject-out x-validation) fold_pairs = [] # If augmentation is selected include augmented data if augment: # Aggregate augmented data and labels data_aug, labels_aug = load_data(filename_aug) data = np.vstack((data, data_aug)) labels = np.vstack((labels, labels_aug)) # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) # Training indices ts = np.squeeze(np.nonzero(ts)) # Include augmented training data tr = np.concatenate((tr, tr + subjNumbers.size)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) else: # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) ts = np.squeeze(np.nonzero(ts)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) # Initializing output variables validScores, testScores = [], [] trainLoss = np.zeros((len(fold_pairs), num_epochs)) validLoss = np.zeros((len(fold_pairs), num_epochs)) validEpochAccu = np.zeros((len(fold_pairs), num_epochs)) for foldNum, fold in enumerate(fold_pairs): print('Beginning fold {0} out of {1}'.format(foldNum + 1, len(fold_pairs))) # Divide the dataset into train, validation and test sets (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput( data, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # Normalizing the input # trainMeans = [np.mean(X_train[:, i, :, :].flatten()) for i in range(X_train.shape[1])] # trainStds = [np.std(X_train[:, i, :, :].flatten()) for i in range(X_train.shape[1])] # for i in range(len(trainMeans)): # X_train[:, i, :, :] = (X_train[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_val[:, i, :, :] = (X_val[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_test[:, i, :, :] = (X_test[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_val = (X_val - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_test = (X_test - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_train = (X_train - np.mean(X_train, axis=0)) / np.float32(256) # X_val = (X_val - np.mean(X_train, axis=0)) / np.float32(256) # X_test = (X_test - np.mean(X_train, axis=0)) / np.float32(256) # X_train = X_train / np.float32(256) # X_val = X_val / np.float32(256) # X_test = X_test / np.float32(256) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") network = build_cnn(input_var) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy( prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.001, momentum=0.9) updates = lasagne.updates.adam(loss, params, learning_rate=0.001) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") best_validation_accu = 0 # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 av_train_err = train_err / train_batches av_val_err = val_err / val_batches av_val_acc = val_acc / val_batches # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_err)) print(" validation loss:\t\t{:.6f}".format(av_val_err)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) trainLoss[foldNum, epoch] = av_train_err validLoss[foldNum, epoch] = av_val_err validEpochAccu[foldNum, epoch] = av_val_acc * 100 if av_val_acc > best_validation_accu: best_validation_accu = av_val_acc # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 av_test_err = test_err / test_batches av_test_acc = test_acc / test_batches print("Final results:") print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: np.savez('weights_lasg{0}'.format(foldNum), *lasagne.layers.get_all_param_values(network)) validScores.append(best_validation_accu * 100) testScores.append(av_test_acc * 100) print('-' * 50) print("Best validation accuracy:\t\t{:.2f} %".format( best_validation_accu * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) scipy.io.savemat( 'cnn_lasg_results_orig', { 'validAccu': validScores, 'testAccu': testScores, 'trainLoss': trainLoss, 'validLoss': validLoss, 'validEpochAccu': validEpochAccu })
batch_size = 32 num_classes = len(np.unique(labels)) # CNN Mode imsize = 32 # Find the average response over time windows #av_feats = reduce(lambda x, y: x + y, [feats[:, i * 206:(i + 1) * 206] for i in range(feats.shape[1] / 206)]) #av_feats = av_feats / (feats.shape[1] / 206) av_feats = reduce( lambda x, y: x + y, [feats[:, i * 206:(i + 1) * 206] for i in range(feats.shape[1] / 206)]) av_feats = av_feats / (feats.shape[1] / 206) images = gen_images(np.array(locs_2d), av_feats, imsize, normalize=False) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) # (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInputIdx(av_feats, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') #print('Generating training images...') #X_train = gen_images(np.array(locs_2d), X_train, imsize, normalize=False) #print('Genarating validation images...') #X_val = gen_images(np.array(locs_2d), X_val, imsize, normalize=False) #print('\n') print('Building model...') network = Sequential()
def main(): # Load the dataset print("Loading data...") data, labels = load_data(filename) mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True) subjNumbers = np.squeeze(mat['subjectNum']) # subject IDs for each trial # Create folds based on subject numbers (for leave-subject-out x-validation) fold_pairs = [] # If augmentation is selected include augmented data if augment: # Aggregate augmented data and labels data_aug, labels_aug = load_data(filename_aug) data = np.vstack((data, data_aug)) labels = np.vstack((labels, labels_aug)) # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) # Training indices ts = np.squeeze(np.nonzero(ts)) # Include augmented training data tr = np.concatenate((tr, tr+subjNumbers.size)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) else: # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) ts = np.squeeze(np.nonzero(ts)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) # Initializing output variables validScores, testScores = [], [] trainLoss = np.zeros((len(fold_pairs), num_epochs)) validLoss = np.zeros((len(fold_pairs), num_epochs)) validEpochAccu = np.zeros((len(fold_pairs), num_epochs)) for foldNum, fold in enumerate(fold_pairs): print('Beginning fold {0} out of {1}'.format(foldNum+1, len(fold_pairs))) # Divide the dataset into train, validation and test sets (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(data, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # Normalizing the input # trainMeans = [np.mean(X_train[:, i, :, :].flatten()) for i in range(X_train.shape[1])] # trainStds = [np.std(X_train[:, i, :, :].flatten()) for i in range(X_train.shape[1])] # for i in range(len(trainMeans)): # X_train[:, i, :, :] = (X_train[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_val[:, i, :, :] = (X_val[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_test[:, i, :, :] = (X_test[:, i, :, :] - trainMeans[i]) / trainStds[i] # X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_val = (X_val - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_test = (X_test - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_train = (X_train - np.mean(X_train, axis=0)) / np.float32(256) # X_val = (X_val - np.mean(X_train, axis=0)) / np.float32(256) # X_test = (X_test - np.mean(X_train, axis=0)) / np.float32(256) # X_train = X_train / np.float32(256) # X_val = X_val / np.float32(256) # X_test = X_test / np.float32(256) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") network = build_cnn(input_var) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.001, momentum=0.9) updates = lasagne.updates.adam(loss, params, learning_rate=0.001) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") best_validation_accu = 0 # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 av_train_err = train_err / train_batches av_val_err = val_err / val_batches av_val_acc = val_acc / val_batches # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_err)) print(" validation loss:\t\t{:.6f}".format(av_val_err)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) trainLoss[foldNum, epoch] = av_train_err validLoss[foldNum, epoch] = av_val_err validEpochAccu[foldNum, epoch] = av_val_acc * 100 if av_val_acc > best_validation_accu: best_validation_accu = av_val_acc # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 av_test_err = test_err / test_batches av_test_acc = test_acc / test_batches print("Final results:") print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: np.savez('weights_lasg{0}'.format(foldNum), *lasagne.layers.get_all_param_values(network)) validScores.append(best_validation_accu * 100) testScores.append(av_test_acc * 100) print('-'*50) print("Best validation accuracy:\t\t{:.2f} %".format(best_validation_accu * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) scipy.io.savemat('cnn_lasg_results_orig', {'validAccu': validScores, 'testAccu': testScores, 'trainLoss': trainLoss, 'validLoss': validLoss, 'validEpochAccu': validEpochAccu })
def main(): # the data, shuffled and split between tran and test sets data, labels = load_data(filename) mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True) subjNumbers = np.squeeze(mat['subjectNum']) # subject IDs for each trial # Creating the folds # kf = StratifiedKFold(np.squeeze(labels), n_folds=ksplit, shuffle=True, random_state=123) # kf = KFold(labels.shape[0], n_folds=ksplit, shuffle=True, random_state=123) # fold_pairs = [(tr, ts) for (tr, ts) in kf] # Leave-Subject-Out cross validation fold_pairs = [] for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) ts = np.squeeze(np.nonzero(ts)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) validScores, testScores = [], [] for foldNum, fold in enumerate(fold_pairs): (X_train, y_train), (X_valid, y_valid), (X_test, y_test) = reformatInput(data, labels, fold) print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_valid.shape[0], 'valid samples') print(X_test.shape[0], 'test samples') X_train = X_train.astype("float32", casting='unsafe') X_valid = X_valid.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # Building the network model = Sequential() model.add(Convolution2D(40, 3, 3, border_mode='full', input_shape=(image_dimensions, shapex, shapey))) model.add(Activation('relu')) model.add(Convolution2D(40, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) # model.add(Convolution2D(80, 3, 3, border_mode='full')) # model.add(Activation('relu')) # model.add(Convolution2D(80, 3, 3)) # model.add(Activation('relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # model.add(Convolution2D(nb_filters[0], image_dimensions, nb_conv[0], nb_conv[0], border_mode='full')) # # model.add(BatchNormalization([nb_filters[0], nb_conv[0], nb_conv[0], image_dimensions])) # model.add(Activation('relu')) # model.add(Convolution2D(nb_filters[0], nb_filters[0], nb_conv[0], nb_conv[0])) # model.add(Activation('relu')) # model.add(MaxPooling2D(poolsize=(nb_pool[0], nb_pool[0]))) # model.add(Dropout(0.25)) # # model.add(Convolution2D(nb_filters[1], nb_filters[0], nb_conv[0], nb_conv[0], border_mode='full')) # model.add(Activation('relu')) # model.add(Convolution2D(nb_filters[1], nb_filters[1], nb_conv[1], nb_conv[1])) # model.add(Activation('relu')) # model.add(MaxPooling2D(poolsize=(nb_pool[1], nb_pool[1]))) # model.add(Dropout(0.25)) # # model.add(Flatten()) # # the image dimensions are the original dimensions divided by any pooling # # each pixel has a number of filters, determined by the last Convolution2D layer # model.add(Dense(nb_filters[-1] * (shapex / nb_pool[0] / nb_pool[1]) * (shapey / nb_pool[0] / nb_pool[1]), 1024)) # # model.add(BatchNormalization([1024])) # model.add(Activation('relu')) # model.add(Dropout(0.5)) # model.add(Dense(1024, nb_classes)) # model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) if not data_augmentation: print("Not using data augmentation or normalization") X_train = X_train.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') X_train /= 255. X_test /= 255. model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True) score, accu = model.evaluate(X_test, Y_test, batch_size=batch_size, show_accuracy=True) print('Test accuracy:', accu) else: print("Using real time data augmentation") # X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_valid = (X_valid - np.mean(X_valid, axis=0)) / np.std(X_valid.flatten(), axis=0) # X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test.flatten(), axis=0) # X_train = (X_train - np.mean(X_train, axis=0)) # X_valid = (X_valid - np.mean(X_train, axis=0)) # X_test = (X_test - np.mean(X_train, axis=0)) # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0, # randomly shift images horizontally (fraction of total width) height_shift_range=0, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) best_validation_accu = 0 for e in range(nb_epoch): print('-'*40) print('Epoch', e) print('-'*40) print("Training...") # batch train with realtime data augmentation progbar = generic_utils.Progbar(X_train.shape[0]) for X_batch, Y_batch in datagen.flow(X_train, Y_train, batch_size=batch_size): score, trainAccu = model.train_on_batch(X_batch, Y_batch, accuracy=True) progbar.add(X_batch.shape[0], values=[("train accuracy", trainAccu)]) print("Validating...") # Validation time! progbar = generic_utils.Progbar(X_valid.shape[0]) epochValidAccu = [] for X_batch, Y_batch in datagen.flow(X_valid, Y_valid, batch_size=batch_size): score, validAccu = model.test_on_batch(X_batch, Y_batch, accuracy=True) epochValidAccu.append(validAccu) progbar.add(X_batch.shape[0], values=[("validation accuracy", validAccu)]) meanValidAccu = np.mean(epochValidAccu) if meanValidAccu > best_validation_accu: best_validation_accu = meanValidAccu best_iter = e print("Testing...") # test time! progbar = generic_utils.Progbar(X_test.shape[0]) epochTestAccu = [] for X_batch, Y_batch in datagen.flow(X_test, Y_test, batch_size=batch_size): score, testAccu = model.test_on_batch(X_batch, Y_batch, accuracy=True) epochTestAccu.append(testAccu) progbar.add(X_batch.shape[0], values=[("test accuracy", testAccu)]) model.save_weights('weigths_{0}'.format(foldNum), overwrite=True) validScores.append(best_validation_accu) testScores.append(np.mean(epochTestAccu)) scipy.io.savemat('cnn_results', {'validAccu': validScores, 'testAccu': testScores}) print ('Average valid accuracies: {0}'.format(np.mean(validScores))) print ('Average test accuracies: {0}'.format(np.mean(testScores)))
def main(): # the data, shuffled and split between tran and test sets data, labels = load_data(filename) mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True) subjNumbers = np.squeeze(mat['subjectNum']) # subject IDs for each trial # Creating the folds # kf = StratifiedKFold(np.squeeze(labels), n_folds=ksplit, shuffle=True, random_state=123) # kf = KFold(labels.shape[0], n_folds=ksplit, shuffle=True, random_state=123) # fold_pairs = [(tr, ts) for (tr, ts) in kf] # Leave-Subject-Out cross validation fold_pairs = [] for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) ts = np.squeeze(np.nonzero(ts)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) validScores, testScores = [], [] for foldNum, fold in enumerate(fold_pairs): (X_train, y_train), (X_valid, y_valid), (X_test, y_test) = reformatInput(data, labels, fold) print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_valid.shape[0], 'valid samples') print(X_test.shape[0], 'test samples') X_train = X_train.astype("float32", casting='unsafe') X_valid = X_valid.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # Building the network model = Sequential() model.add( Convolution2D(40, 3, 3, border_mode='full', input_shape=(image_dimensions, shapex, shapey))) model.add(Activation('relu')) model.add(Convolution2D(40, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) # model.add(Convolution2D(80, 3, 3, border_mode='full')) # model.add(Activation('relu')) # model.add(Convolution2D(80, 3, 3)) # model.add(Activation('relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # model.add(Convolution2D(nb_filters[0], image_dimensions, nb_conv[0], nb_conv[0], border_mode='full')) # # model.add(BatchNormalization([nb_filters[0], nb_conv[0], nb_conv[0], image_dimensions])) # model.add(Activation('relu')) # model.add(Convolution2D(nb_filters[0], nb_filters[0], nb_conv[0], nb_conv[0])) # model.add(Activation('relu')) # model.add(MaxPooling2D(poolsize=(nb_pool[0], nb_pool[0]))) # model.add(Dropout(0.25)) # # model.add(Convolution2D(nb_filters[1], nb_filters[0], nb_conv[0], nb_conv[0], border_mode='full')) # model.add(Activation('relu')) # model.add(Convolution2D(nb_filters[1], nb_filters[1], nb_conv[1], nb_conv[1])) # model.add(Activation('relu')) # model.add(MaxPooling2D(poolsize=(nb_pool[1], nb_pool[1]))) # model.add(Dropout(0.25)) # # model.add(Flatten()) # # the image dimensions are the original dimensions divided by any pooling # # each pixel has a number of filters, determined by the last Convolution2D layer # model.add(Dense(nb_filters[-1] * (shapex / nb_pool[0] / nb_pool[1]) * (shapey / nb_pool[0] / nb_pool[1]), 1024)) # # model.add(BatchNormalization([1024])) # model.add(Activation('relu')) # model.add(Dropout(0.5)) # model.add(Dense(1024, nb_classes)) # model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) if not data_augmentation: print("Not using data augmentation or normalization") X_train = X_train.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') X_train /= 255. X_test /= 255. model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True) score, accu = model.evaluate(X_test, Y_test, batch_size=batch_size, show_accuracy=True) print('Test accuracy:', accu) else: print("Using real time data augmentation") # X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train.flatten(), axis=0) # X_valid = (X_valid - np.mean(X_valid, axis=0)) / np.std(X_valid.flatten(), axis=0) # X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test.flatten(), axis=0) # X_train = (X_train - np.mean(X_train, axis=0)) # X_valid = (X_valid - np.mean(X_train, axis=0)) # X_test = (X_test - np.mean(X_train, axis=0)) # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization= False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0, # randomly shift images horizontally (fraction of total width) height_shift_range= 0, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) best_validation_accu = 0 for e in range(nb_epoch): print('-' * 40) print('Epoch', e) print('-' * 40) print("Training...") # batch train with realtime data augmentation progbar = generic_utils.Progbar(X_train.shape[0]) for X_batch, Y_batch in datagen.flow(X_train, Y_train, batch_size=batch_size): score, trainAccu = model.train_on_batch(X_batch, Y_batch, accuracy=True) progbar.add(X_batch.shape[0], values=[("train accuracy", trainAccu)]) print("Validating...") # Validation time! progbar = generic_utils.Progbar(X_valid.shape[0]) epochValidAccu = [] for X_batch, Y_batch in datagen.flow(X_valid, Y_valid, batch_size=batch_size): score, validAccu = model.test_on_batch(X_batch, Y_batch, accuracy=True) epochValidAccu.append(validAccu) progbar.add(X_batch.shape[0], values=[("validation accuracy", validAccu)]) meanValidAccu = np.mean(epochValidAccu) if meanValidAccu > best_validation_accu: best_validation_accu = meanValidAccu best_iter = e print("Testing...") # test time! progbar = generic_utils.Progbar(X_test.shape[0]) epochTestAccu = [] for X_batch, Y_batch in datagen.flow( X_test, Y_test, batch_size=batch_size): score, testAccu = model.test_on_batch(X_batch, Y_batch, accuracy=True) epochTestAccu.append(testAccu) progbar.add(X_batch.shape[0], values=[("test accuracy", testAccu)]) model.save_weights('weigths_{0}'.format(foldNum), overwrite=True) validScores.append(best_validation_accu) testScores.append(np.mean(epochTestAccu)) scipy.io.savemat('cnn_results', { 'validAccu': validScores, 'testAccu': testScores }) print('Average valid accuracies: {0}'.format(np.mean(validScores))) print('Average test accuracies: {0}'.format(np.mean(testScores)))
def train(images, labels, fold, model_type, batch_size=32, num_epochs=5, n_layer = (4, 2, 1)): print('model type:', model_type) #for data parsing num_classes = 4 if len(images.shape) == 4: sampleN, images_height, images_width, colorN = images.shape print('4:', images.shape) input_var = tf.placeholder(tf.float32, [None, images_height, images_width, colorN]) output_var = tf.placeholder(tf.float32, [None, num_classes]) elif len(images.shape) == 5: windowN, sampleN, images_height, images_width, colorN = images.shape #print('5:', images.shape) #images = np.swapaxes(images, 0, 1) print('5:', images.shape) input_var = tf.placeholder(tf.float32, [None, windowN, images_height, images_width, colorN]) output_var = tf.placeholder(tf.float32, [None, num_classes]) else: print('warning!') return False #data devide (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') keep_prob = tf.placeholder(tf.float32) #accordance to ... model. if model_type == '1dconv': #network = build_convpool_conv1d(input_var, num_classes, n_timewin = windowN) network = build_convpool_conv1d(input_var, num_classes) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output_var)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) elif model_type == 'maxpool': network = build_convpool_max(input_var, num_classes) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output_var)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output_var)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output_var)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) elif model_type == 'cnn': network, _ = build_cnn(input_var, n_layers = n_layer) #define fully connected network network = tf.reshape(network, [-1, 4*4*128]) network = tf.add(tf.matmul(network, weight_variable([4*4*128, 512])), bias_variable([512])) network = tf.nn.relu(network) network = tf.nn.dropout(network, 0.5) #define out layer network = tf.add(tf.matmul(network, weight_variable([512, num_classes])),bias_variable([num_classes])) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output_var)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) else: raise ValueError("Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") #for monitor final network print("here is our network shape") print(network.shape) print(output_var.shape) #create a loss expression for training, i.e. ascalar objective we want #to minimize (for our multi-class problem, .... o #evaluate model correct_pred = tf.equal(tf.argmax(network, 1), tf.argmax(output_var, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #MODEL Def end #Traingning & varlidation.. print("start training") # Initializing the variables init = tf.global_variables_initializer() display_step = 1 xin = input_var yout = output_var #for saving weight saver = tf.train.Saver() filepath = "./tmp/" filesave_path = filepath + model_type + ".ckpt" # gpu config config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True #run session with tf.Session(config = config) as sess: writer = tf.summary.FileWriter('logs', sess.graph) sess.run(init) best_val_acc = 0 for epoch in range(num_epochs): train_loss = 0 val_loss = 0 train_batches = 0 start_time = time.time() train_step = 1 val_step = 1 val_batches = 0 sum_train_acc = 0 sum_train_loss = 0 sum_val_acc = 0 sum_val_loss = 0 # train accuracy for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch targets = np.eye(num_classes)[targets.reshape(-1)] #check for monitor size if train_step ==1: print("batch info is.. batch, inputs.shape, targets.shape") print(X_train.shape) print(y_train.shape) print("batch info") print(inputs.shape) print(targets.shape) print(xin.shape) print(yout.shape) sess.run(optimizer, feed_dict = {xin: inputs, yout: targets, keep_prob: 0.5}) if train_step % display_step == 0: #calculate batch loss & accuracy train_loss, train_acc = sess.run([cost, accuracy], feed_dict = {xin: inputs, yout: targets, keep_prob: 1.0}) print("Iter " + str(train_step*batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(train_loss) + ", Training Accuracy= " + \ "{:.5f}".format(train_acc)) sum_train_loss += train_loss sum_train_acc += train_acc print(train_loss, sum_train_loss, train_acc, sum_train_acc) save_path = saver.save(sess, filesave_path, global_step = train_step) train_step += 1 print("train Opt fin") # validation accuracy for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch targets = np.eye(num_classes)[targets.reshape(-1)] sess.run(optimizer, feed_dict = {xin: inputs, yout: targets, keep_prob: 0.5}) if val_step % display_step == 0: #calculate batch loss & accuracy val_acc = sess.run(accuracy, feed_dict = {xin: inputs, yout: targets, keep_prob: 1.0}) print("Iter " + str(val_step*batch_size) + ", Minibatch Loss= " + \ ", cross validatino Accuracy= " + "{:.5f}".format(val_acc)) #sum_val_loss += val_loss sum_val_acc += val_acc print(val_acc, sum_val_acc) val_step += 1 print("Validation Opt fin") #calculate average loss & accuracy av_train_loss = sum_train_loss/train_step*display_step av_train_acc = sum_train_acc/train_step*display_step #av_val_loss = sum_val_loss/val_step*display_step av_val_acc = sum_val_acc/val_step*display_step print(av_train_loss, av_train_acc, av_val_acc) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_loss)) #print(" validation loss:\t\t{:.6f}".format(av_val_loss)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) if av_val_acc > best_val_acc: best_val_acc = av_val_acc # After training, we compute & print the test error sum_test_err = 0 sum_test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch targets = np.eye(num_classes)[targets.reshape(-1)] test_acc = sess.run(accuracy, feed_dict={xin: inputs, yout: targets, keep_prob: 1.0}) print(test_acc) sum_test_acc += test_acc test_batches += 1 av_test_acc = sum_test_acc / test_batches print("Final results:") #print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: #tf.saver.save(sess, 'my-model') # `save` method will call `export_meta_graph` implicitly. # you will get saved graph files:my-model.meta print('-'*50) print("Best validation accuracy:\t\t{:.2f} %".format(best_val_acc * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) writer = tf.summary.FileWriter('logs', sess.graph) #modelsave part save_path = saver.save(sess, filesave_path) print("Model saved in file: %s" % save_path)
batch_size = 32 num_epochs = 5 num_classes = len(np.unique(labels)) # CNN Mode imsize = 32 # Find the average response over time windows av_feats = reduce(lambda x, y: x + y, [feats[:, i * 206:(i + 1) * 206] for i in range(feats.shape[1] / 206)]) av_feats = av_feats / (feats.shape[1] / 206) images = gen_images(np.array(locs_2d), av_feats, imsize, normalize=False) # images = np.load('images.npy') np.save('images', images) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') print('Building model...') network = Sequential() network.add(InputLayer(input_shape=(3, imsize, imsize))) network.add(Conv2D(32, 3, padding='same', data_format='channels_first', activation='relu')) network.add(Conv2D(32, 3, padding='same', data_format='channels_first', activation='relu')) network.add(Conv2D(32, 3, padding='same', data_format='channels_first', activation='relu')) network.add(Conv2D(32, 3, padding='same', data_format='channels_first', activation='relu'))