def test_X_is_dict(self, BatchIterator, X_dict, shuffle): bi = BatchIterator(2, shuffle=shuffle)(X_dict) batches = list(bi) assert len(batches) == 10 X0, y0 = batches[0] assert X0['one'].shape == (2, 10) assert X0['two'].shape == (2, 10) assert y0 is None Xt1 = np.vstack(b[0]['one'] for b in batches) Xt2 = np.vstack(b[0]['two'] for b in batches) assert Xt1.shape == X_dict['one'].shape assert Xt2.shape == X_dict['two'].shape np.testing.assert_equal(Xt1[:, 0], Xt2[:, 0] / 10) if shuffle is False: np.testing.assert_equal(X_dict['one'][:2], X0['one']) np.testing.assert_equal(X_dict['two'][:2], X0['two'])
def get_accuracy_and_loss_in_batches(X, y): p = [] sce = [] batch_iterator = BatchIterator(batch_size=128) for x_batch, y_batch in batch_iterator(X, y): [p_batch, sce_batch] = session.run([predictions, softmax_cross_entropy], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: False }) p.extend(p_batch) sce.extend(sce_batch) p = np.array(p) sce = np.array(sce) accuracy = 100.0 * np.sum( np.argmax(p, 1) == np.argmax(y, 1)) / p.shape[0] loss = np.mean(sce) return (accuracy, loss)
def _create_nnet(self, input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): """ A subclass may override this if a different sort of network is desired. """ nnlayers = [('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output', layers.DenseLayer)] nnet = NeuralNet( layers=nnlayers, # layer parameters: input_shape=(None, input_dims), hidden_num_units=num_hidden_units, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_num_units=output_dims, # optimization method: update=update_method, update_learning_rate=learning_rate, regression= True, # flag to indicate we're dealing with regression problem max_epochs=max_train_epochs, batch_iterator_train=BatchIterator(batch_size=batch_size), train_split=nolearn.lasagne.TrainSplit(eval_size=0), verbose=0, ) nnet.initialize() return nnet
def get_predictions_in_batches(X, session): """ Calculates predictions in batches of 128 examples at a time, using `session`'s calculation graph. Parameters ---------- X : ndarray Dataset to get predictions for. session : TensorFlow session to be used for predicting. Is expected to have a `predictions` var in the graph along with a `tf_x_batch` placeholder for incoming data. Returns ------- N-dimensional array of predictions. """ p = [] batch_iterator = BatchIterator(batch_size=128) for x_batch, _ in batch_iterator(X): [p_batch] = session.run([predictions], feed_dict={tf_x_batch: x_batch}) p.extend(p_batch) return p
def build_net(vectorizer, batch_size=1024 * 10, r1_size=100): vocab_size = vectorizer.num_chars seq_length = vectorizer.seq_length net = NeuralNet( layers=[('input', layers.InputLayer), ('r1', layers.LSTMLayer), ('s1', layers.SliceLayer), ('output', layers.DenseLayer)], input_shape=(None, 25, vocab_size), r1_num_units=r1_size, s1_indices=-1, s1_axis=1, output_num_units=vocab_size, output_nonlinearity=softmax, update=nesterov_momentum, update_learning_rate=0.1, update_momentum=0.9, # update=adam, # update_learning_rate=0.01, max_epochs=10000, on_epoch_finished=[SaveBestModel('rnn', vectorizer)], batch_iterator_train=BatchIterator(batch_size), train_split=TrainSplit(eval_size=0.0), regression=False, verbose=2) return net
hidden1_num_units=500, # number of units in hidden layer hidden1_nonlinearity=nonlinearities.rectify, dropout1_p=0.5, hidden2_num_units=500, hidden2_nonlinearity=nonlinearities.rectify, dropout2_p=0.5, hidden3_num_units=500, hidden3_nonlinearity=nonlinearities.rectify, dropout3_p=0.5, output_nonlinearity=nonlinearities. softmax, # output layer uses identity function output_num_units=9, # 30 target values objective=L2Regularization, objective_alpha=0.000005, eval_size=0.0, batch_iterator_train=BatchIterator(batch_size=1024), batch_iterator_test=BatchIterator(batch_size=1024), # optimization method: update=nesterov_momentum, update_learning_rate=theano.shared(float32(0.03)), update_momentum=theano.shared(float32(0.9)), on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.03, stop=0.01), AdjustVariable('update_momentum', start=0.9, stop=0.999), #EarlyStopping(patience=200), ], regression=False, # flag to indicate we're dealing with regression problem max_epochs=100, # we want to train this many epochs verbose=1, )
np.random.seed(15) IMG_SHAPE = 224, 224 path = '../input' X, y, drivers = load_train_data(path, grayscale=False, img_shape=IMG_SHAPE, equalize=False, zeromean=True, usecache=False) X, y, drivers = shuffle(X, y, drivers, random_state=0) train_split = CVTrainSplit(LabelKFold(drivers, n_folds=2)) batch_iterator_train = RotateBatchIterator(10, 0.5, 12, True) batch_iterator_test = BatchIterator(12, False) layer = [(InputLayer, { 'shape': (None, 3, 224, 224) }), (Conv2DDNNLayer, { 'num_filters': 64, 'filter_size': 3, 'pad': 1 }), (Conv2DDNNLayer, { 'num_filters': 64, 'filter_size': 3, 'pad': 1 }), (MaxPool2DLayer, { 'pool_size': 2 }), (Conv2DDNNLayer, { 'num_filters': 128,
def train(self, data, loss_calculator): print 'Start training ...' x_train = data['train']['x'] y_train = data['train']['y'] x_val = data['val']['x'] y_val = data['val']['y'] x_test = data['test']['x'] y_test = data['test']['y'] graph, init_graph = self.graph_model.get_graph() optimizer = self.graph_model.optimizer x_placeholder, y_placeholder, is_training_placeholder = self.graph_model.get_placeholders() print 'Running a session ...' tf_config = tf.ConfigProto(device_count={'GPU': 0}) tf_config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=tf_config) as self.session: self.session.run(init_graph) saver = tf.train.Saver() summary_op = tf.summary.merge_all() writer = tf.summary.FileWriter(logdir=self.logdir, graph=self.session.graph) predictor = Predictor( sess=self.session, predict_graph=self.graph_model.predictions, feed_dict={ 'x': x_placeholder, 'training': is_training_placeholder }, batch_size=self.batch_size) for epoch in range(self.epochs): print '%s / %s th epoch, training ...' % (epoch, self.epochs) batch_iterator = BatchIterator(batch_size=self.batch_size, shuffle=True) for x_train_batch, y_train_batch in batch_iterator(x_train, y_train): _, summary = self.session.run([optimizer, summary_op], feed_dict={ x_placeholder: x_train_batch, y_placeholder: y_train_batch, is_training_placeholder: True }) if epoch % self.val_epoch == 0: print '[Validating Round]' predict_train = predictor.predict_in_batch(x_train) predict_val = predictor.predict_in_batch(x_val) loss_train = loss_calculator.calculate(predict_train, y_train) loss_val = loss_calculator.calculate(predict_val, y_val) print '%s th epoch:\n'\ ' train loss: %s' \ ' val loss: %s'\ % (epoch, loss_train, loss_val) writer.add_summary(summary, epoch) if (epoch % self.save_epoch == 0) or (epoch == self.epochs - 1): print '[Testing Round]' snapshot_path = saver.save(sess=self.session, save_path=self.save_path) print 'Snapshot of %s th epoch is saved to %s' % (epoch, snapshot_path) predict_test = predictor.predict_in_batch(x_test) loss_test = loss_calculator.calculate(predict_test, y_test) print '%s th epoch:\n' \ ' test loss: %s' \ % (epoch, loss_test)
net = NeuralNet( layers=outputLayer, update=updates.nesterov_momentum, #update=updates.adam, #update=updates.rmsprop, update_learning_rate=0.4, #update_beta1 = 0.9, #update_beta2 = 0.999, #update_epsilon = 1e-8, update_momentum=0.9, #update_rho=0.9, #update_epsilon=1e-06, objective_loss_function=objectives.categorical_crossentropy, #objective=objectives.categorical_crossentropy, batch_iterator_train=BatchIterator(batch_size=batchSize), batch_iterator_test=BatchIterator(batch_size=batchSize), train_split=TrainSplit(eval_size=0.2, stratify=False), use_label_encoder=True, #use_label_encoder = False, regression=False, max_epochs=numberEpochs, verbose=1) #x_fit, x_eval, y_fit, y_eval= cross_validation.train_test_split(xTrain, y, test_size=0.2) net.fit(xTrain, y) predictY = net.predict_proba(xTrain) print(metrics.log_loss(originalY, predictY))
def test_shuffle_no_copy(self, BatchIterator, X, y): bi = BatchIterator(2, shuffle=True)(X, y) X0, y0 = list(bi)[0] assert X0.base is X # make sure X0 is a view
#conv5Layer = layers.Conv2DLayer(dropout4Layer, num_filters=1024, filter_size=(2,2)) #hidden1Layer = layers.DenseLayer(conv4Layer, num_units=8192, nonlinearity=elu) hidden2Layer = layers.DenseLayer(conv4Layer, num_units=4096, nonlinearity=rectify) hidden3Layer = layers.DenseLayer(hidden2Layer, num_units=2048, nonlinearity=tanh) hidden4Layer = layers.DenseLayer(hidden3Layer, num_units=1024, nonlinearity=elu) hidden5Layer = layers.DenseLayer(hidden4Layer, num_units=512, nonlinearity=tanh) hidden6Layer = layers.DenseLayer(hidden5Layer, num_units=256, nonlinearity=elu) hidden7Layer = layers.DenseLayer(hidden6Layer, num_units=128, nonlinearity=tanh) outputLayer = layers.DenseLayer(hidden7Layer, num_units=62, nonlinearity=softmax)''' net = NeuralNet( layers=outputLayer, update_learning_rate=0.01, update_momentum=0.90, batch_iterator_train=BatchIterator(batch_size=100), batch_iterator_test=BatchIterator(batch_size=100), use_label_encoder=True, #use_label_encoder = False, regression=False, max_epochs=200, verbose=1) ''' net = NeuralNet( layers = [ ('input', layers.InputLayer), #('hidden1', layers.DenseLayer), #('transform', transformLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('dropout1', layers.DropoutLayer),
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [(LL.InputLayer, { 'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_') }), (LL.InputLayer, { 'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_') }), (LL.InputLayer, { 'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_') }), (LL.InputLayer, { 'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size) })] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i + 1), 'axis': 1 })]) for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i + 1), 'axis': 1 })]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, { 'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None })]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, { 'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin })]) for i in range(1, num_hops): if i % 2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, { 'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin })]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, { 'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names })]) #--------------------------------------------------------------------RNN layers layers.extend([( RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([( RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.ElemwiseMergeLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add })]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, { 'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max' })]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do', 'p': dropout })]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout })]) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn]) nnet.initialize() PrintLayerInfo()(nnet) return nnet
def init_net(self, feature_count, class_count=NCLASSES, verbosity=VERBOSITY >= 2): """ Initialize the network (needs to be done when data is available in order to set dimensions). """ if VERBOSITY >= 1: print 'initializing network {0:s} {1:d}x{2:d}x{3:d}'.format( self.name, self.dense1_size or 0, self.dense2_size or 0, self.dense3_size or 0) if VERBOSITY >= 2: print 'parameters: ' + ', '.join( '{0:s} = {1:}'.format(k, v) for k, v in self.get_params(deep=False).items()) self.feature_count = feature_count self.class_count = class_count """ Create the layers and their settings. """ self.layers = [ ('input', InputLayer), ] self.params = { 'dense1_num_units': self.dense1_size, 'dense1_nonlinearity': nonlinearities[self.dense1_nonlinearity], 'dense1_W': initializers[self.dense1_init], 'dense1_b': Constant(0.), } if self.dropout0_rate: self.layers += [('dropout0', DropoutLayer)] self.params['dropout0_p'] = self.dropout0_rate self.layers += [ ('dense1', DenseLayer), ] if self.dropout1_rate: self.layers += [('dropout1', DropoutLayer)] self.params['dropout1_p'] = self.dropout1_rate if self.dense2_size: self.layers += [('dense2', DenseLayer)] self.params.update({ 'dense2_num_units': self.dense2_size, 'dense2_nonlinearity': nonlinearities[self.dense2_nonlinearity], 'dense2_W': initializers[self.dense2_init], 'dense2_b': Constant(0.), }) else: assert not self.dense3_size, 'There cannot be a third dense layer without a second one' if self.dropout2_rate: assert self.dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.' self.layers += [('dropout2', DropoutLayer)] self.params['dropout2_p'] = self.dropout2_rate if self.dense3_size: self.layers += [('dense3', DenseLayer)] self.params.update({ 'dense3_num_units': self.dense3_size, 'dense3_nonlinearity': nonlinearities[self.dense3_nonlinearity], 'dense3_W': initializers[self.dense3_init], 'dense3_b': Constant(0.), }) if self.dropout3_rate: assert self.dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.' self.layers += [('dropout3', DropoutLayer)] self.params['dropout3_p'] = self.dropout2_rate self.layers += [('output', DenseLayer)] self.params.update({ 'output_nonlinearity': nonlinearities[self.output_nonlinearity], 'output_W': GlorotUniform(), 'output_b': Constant(0.), }) """ Create meta parameters and special handlers. """ if VERBOSITY >= 3: print 'learning rate: {0:.6f} -> {1:.6f}'.format( abs(self.learning_rate), abs(self.learning_rate) / float(self.learning_rate_scaling)) print 'momentum: {0:.6f} -> {1:.6f}'.format( abs(self.momentum), 1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))) self.step_handlers = [ LinearVariable('update_learning_rate', start=abs(self.learning_rate), stop=abs(self.learning_rate) / float(self.learning_rate_scaling)), LinearVariable( 'update_momentum', start=abs(self.momentum), stop=1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))), StopNaN(), ] self.end_handlers = [ SnapshotEndSaver(base_name=self.name), TrainProgressPlotter(base_name=self.name), ] snapshot_name = 'nn_' + params_name(self.params, prefix=self.name)[0] if self.save_snapshots_stepsize: self.step_handlers += [ SnapshotStepSaver(every=self.save_snapshots_stepsize, base_name=snapshot_name), ] if self.auto_stopping: self.step_handlers += [ StopWhenOverfitting(loss_fraction=0.9, base_name=snapshot_name), StopAfterMinimum(patience=40, base_name=self.name), ] weight_decay = shared(float32(abs(self.weight_decay)), 'weight_decay') if self.adaptive_weight_decay: self.step_handlers += [ AdaptiveWeightDecay(weight_decay), ] if self.epoch_steps: self.step_handlers += [ BreakEveryN(self.epoch_steps), ] """ Create the actual nolearn network with information from __init__. """ self.net = NeuralNet( layers=self.layers, objective=partial(WeightDecayObjective, weight_decay=weight_decay), input_shape=(None, feature_count), output_num_units=class_count, update=nesterov_momentum, # todo: make parameter update_learning_rate=shared(float32(self.learning_rate)), update_momentum=shared(float(self.weight_decay)), on_epoch_finished=self.step_handlers, on_training_finished=self.end_handlers, regression=False, max_epochs=self.max_epochs, verbose=verbosity, batch_iterator_train=BatchIterator(batch_size=self.batch_size), batch_iterator_test=BatchIterator(batch_size=self.batch_size), eval_size=0.1, #custom_score = ('custom_loss', categorical_crossentropy), **self.params) self.net.parent = self self.net.initialize() return self.net
def gen_BatchIterator(self, batch_size=100): """Generate the batch iterator""" B = BatchIterator(batch_size=batch_size, shuffle=True) return B
def train_model(params, X_train, y_train, X_valid, y_valid, X_test, y_test): paths = Paths(params) log = logging.getLogger() fl = logging.FileHandler('train.log') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') fl.setFormatter(formatter) screen = logging.StreamHandler(sys.stdout) log.setLevel(logging.DEBUG) log.addHandler(fl) log.addHandler(screen) start = time.time() model_variable_scope = paths.var_scope log_parameters(log, params, y_train.shape[0], y_valid.shape[0], y_test.shape[0]) graph = tf.Graph() with graph.as_default(): tf_x_batch = tf.placeholder(tf.float32, shape=(None, params.image_size[0], params.image_size[1], 1)) tf_y_batch = tf.placeholder(tf.float32, shape=(None, params.num_classes)) is_training = tf.placeholder(tf.bool) current_epoch = tf.Variable(0, trainable=False) if params.learning_rate_decay: learning_rate = tf.train.exponential_decay( params.learning_rate, current_epoch, decay_steps=params.max_epochs, decay_rate=0.01) else: learning_rate = params.learning_rate with tf.variable_scope(model_variable_scope): logits = model_pass(tf_x_batch, params, is_training) if params.l2_reg_enabled: with tf.variable_scope('fc4', reuse=True): l2_loss = tf.nn.l2_loss(tf.get_variable('weights')) else: l2_loss = 0 predictions = tf.nn.softmax(logits) softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits, tf_y_batch) loss = tf.reduce_mean( softmax_cross_entropy) + params.l2_lambda * l2_loss optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) def get_accuracy_and_loss_in_batches(X, y): p = [] sce = [] batch_iterator = BatchIterator(batch_size=128) for x_batch, y_batch in batch_iterator(X, y): [p_batch, sce_batch] = session.run([predictions, softmax_cross_entropy], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: False }) p.extend(p_batch) sce.extend(sce_batch) p = np.array(p) sce = np.array(sce) accuracy = 100.0 * np.sum( np.argmax(p, 1) == np.argmax(y, 1)) / p.shape[0] loss = np.mean(sce) return (accuracy, loss) if params.resume_training: try: tf.train.Saver().restore(session, paths.model_path) except Exception as e: print("Failed restoring previous model: File does not exist.") pass saver = tf.train.Saver() early_stopping = EarlyStopping(tf.train.Saver(), session, patience=params.early_stopping_patience, minimize=True) train_loss_history = np.empty([0], dtype=np.float32) train_accuracy_history = np.empty([0], dtype=np.float32) valid_loss_history = np.empty([0], dtype=np.float32) valid_accuracy_history = np.empty([0], dtype=np.float32) if params.max_epochs > 0: print("____________TRAINING______________" ) # changed from log to print else: print('____________TESTING_______________') # ditto print('Timestamp:' + utils.get_time_hhmmss()) #log.sync() for epoch in range(params.max_epochs): current_epoch = epoch batch_iterator = BatchIterator(batch_size=params.batch_size, shuffle=True) for x_batch, y_batch in batch_iterator(X_train, y_train): session.run([optimizer], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: True }) if epoch % params.log_epoch == 0: valid_accuracy, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) train_accuracy, train_loss = get_accuracy_and_loss_in_batches( X_train, y_train) if epoch % params.print_epoch == 0: print("____________EPOCH %4d/%d______________" % (epoch, params.max_epochs)) print(" Train loss: %.8f, accuracy: %.2f%%" % (train_loss, train_accuracy)) print("Validation loss: %.8f, accuracy: %.2f%%" % (valid_loss, valid_accuracy)) print(" Best loss: %.8f, accuracy: %.2f%%" % (early_stopping.best_monitored_value, early_stopping.best_monitored_epoch)) print(" Elapsed time: " + utils.get_time_hhmmss(start)) print(" Timestamp: " + utils.get_time_hhmmss()) #log.sync else: valid_loss = 0. valid_accuracy = 0. train_loss = 0. train_accuracy = 0. valid_loss_history = np.append(valid_loss_history, [valid_loss]) valid_accuracy_history = np.append(valid_accuracy_history, [valid_accuracy]) train_loss_history = np.append(train_loss_history, [train_loss]) train_accuracy_history = np.append(train_accuracy_history, [train_accuracy]) if params.early_stopping_enabled: if valid_loss == 0: _, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) if early_stopping(valid_loss, epoch): print( "Early stopping.\nBest monitored loss was {:.8f} at epoch {}" .format(early_stopping.best_monitored_value, early_stopping.best_monitored_epoch)) break test_accuracy, test_loss = get_accuracy_and_loss_in_batches( X_test, y_test) valid_accuracy, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print(" Valid loss: %.8f, accuracy = %.2f%%" % (valid_loss, valid_accuracy)) print(" Test loss: %.8f, accuracy = %.2f%%" % (test_loss, test_accuracy)) print(" Total time: " + utils.get_time_hhmmss(start)) print(" Timestamp: " + utils.get_time_hhmmss()) saved_model_path = saver.save(session, paths.model_path) print("Model file: " + saved_model_path) np.savez(paths.training_history_path, train_loss_history=train_loss_history, train_accuracy_history=train_accuracy_history, valid_loss_history=valid_loss_history, valid_accuracy_history=valid_accuracy_history) print("Training history file:" + paths.training_history_path) plot_learning_curves(params)
#--- Initialise nolearn NN object --- # net_cnn = nolas.NeuralNet( layers = layers_lst, # Optimization: max_epochs = 10, update = lasagne.updates.adadelta, # Objective: objective_loss_function = lasagne.objectives.binary_crossentropy, # Batch size & Splits: train_split = TrainSplit( eval_size=.3 ), batch_iterator_train = BatchIterator(batch_size=10, shuffle=False), batch_iterator_test = BatchIterator(batch_size=10, shuffle=False), # Custom scores: # 1) target; 2) preds: custom_scores = [('auc', lambda y_true, y_proba: roc_auc_score(y_true, y_proba[:,0]))], # 1) preds; 2) target; scores_train = None, scores_valid = None, # misc: y_tensor_type = T.imatrix, regression = True, verbose = 1, # CallBacks:
#loss=tf.reduce_mean(tf.square(predictions-tf_y_batch)) #Optimizer optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) every_epoch_to_log = 1 with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_loss_history = np.zeros(num_epochs) valid_loss_history = np.zeros(num_epochs) print("============ TRAINING =============") for epoch in range(num_epochs): current_epoch = epoch batch_iterator = BatchIterator(batch_size=batch_size, shuffle=True) for x_batch, y_batch in batch_iterator(voxels, onehot_labels): session.run(optimizer, feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: True }) #to log the losses get predictions on entire training set p = [] total_loss = 0 if (epoch % every_epoch_to_log == 0): batch_iterator = BatchIterator(batch_size=128) for x_batch, y_batch in batch_iterator(voxels, onehot_labels): #[p_batch]=session.run([logits],feed_dict={tf_x_batch:x_batch,is_training:False})
(MaxPool1DLayer, {'pool_size': (2)}), # two dense layers with dropout (DropoutLayer, {'p': 0.5}), (DenseLayer, {'num_units': 512}), # the output layer (DropoutLayer, {'p': 0.5}), (DenseLayer, {'num_units': 2, 'nonlinearity': softmax}), ] # Network parameters net0 = NeuralNet( layers=layers0, max_epochs=100, batch_iterator_train = BatchIterator(batch_size=100, shuffle=True), update=nesterov_momentum, update_learning_rate=0.01, objective_l2=0.001, train_split=TrainSplit(eval_size=0.2), verbose=2, ) # Train net0.fit(x_train, y_train) # Plot learning curve plot_loss(net0)
def train_model(train_samples, train_phenotypes, labels, valid_samples=None, valid_phenotypes=None, generate_valid_set=True, train_sample_flags=None, valid_sample_flags=None, landmark_norm=None, scale=True, ncell=500, nsubset=4096, subset_selection='random', nrun=10, pooling='max', ncell_pooled=None, regression=False, nfilter=2, learning_rate=0.03, momentum=0.9, l2_weight_decay_conv=1e-8, l2_weight_decay_out=1e-8, max_epochs=10, verbose=1, select_filters='consensus', accur_thres=.9, benchmark_scores=False): ''' train_samples: list with input samples, e.g. cytometry samples train_phenotype: phenotype associated with the samples in train_samples labels: labels of measured markers in train_samples ''' # copy the list of samples so that they are not modified in place train_samples = copy.deepcopy(train_samples) if valid_samples is not None: valid_samples = copy.deepcopy(valid_samples) # create dummy single-cell flags if not given if train_sample_flags is None: train_sample_flags = [np.zeros((x.shape[0],1), dtype=int) for x in train_samples] if (valid_samples is not None) and (valid_sample_flags is None): valid_sample_flags = [np.zeros((x.shape[0],1), dtype=int) for x in valid_samples] if landmark_norm is not None: idx_to_normalize = [labels.index(label) for label in landmark_norm] train_samples = landmark_normalization(train_samples, idx_to_normalize) if valid_samples is not None: valid_samples = landmark_normalization(valid_samples, idx_to_normalize) # normalize extreme values # we assume that 0 corresponds to the control class if subset_selection == 'outlier': ctrl_list = [train_samples[i] for i in np.where(np.array(train_phenotypes) == 0)[0]] test_list = [train_samples[i] for i in np.where(np.array(train_phenotypes) == 1)[0]] train_samples = normalize_outliers_to_control(ctrl_list, test_list) if valid_samples is not None: ctrl_list = [valid_samples[i] for i in np.where(np.array(valid_phenotypes) == 0)[0]] test_list = [valid_samples[i] for i in np.where(np.array(valid_phenotypes) == 1)[0]] valid_samples = normalize_outliers_to_control(ctrl_list, test_list) if (valid_samples is None) and (not generate_valid_set): sample_ids = range(len(train_phenotypes)) X_train, id_train, z_train = combine_samples(train_samples, sample_ids, train_sample_flags) elif (valid_samples is None) and generate_valid_set: sample_ids = range(len(train_phenotypes)) X, sample_id, z = combine_samples(train_samples, sample_ids, train_sample_flags) valid_phenotypes = train_phenotypes # split into train-validation partitions eval_folds = 5 kf = StratifiedKFold(sample_id, eval_folds) train_indices, valid_indices = next(iter(kf)) X_train, id_train, z_train = X[train_indices], sample_id[train_indices], z[train_indices] X_valid, id_valid , z_valid = X[valid_indices], sample_id[valid_indices], z[valid_indices] else: sample_ids = range(len(train_phenotypes)) X_train, id_train, z_train = combine_samples(train_samples, sample_ids, train_sample_flags) sample_ids = range(len(valid_phenotypes)) X_valid, id_valid, z_valid = combine_samples(valid_samples, sample_ids, valid_sample_flags) # scale all marker distributions to mu=0, std=1 if scale: scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_train, z_train, id_train = shuffle(X_train, z_train, id_train) train_phenotypes = np.asarray(train_phenotypes) y_train = train_phenotypes[id_train] if (valid_samples is not None) or generate_valid_set: if scale: X_valid = scaler.transform(X_valid) X_valid, z_valid, id_valid = shuffle(X_valid, z_valid, id_valid) valid_phenotypes = np.asarray(valid_phenotypes) y_valid = valid_phenotypes[id_valid] # number of measured markers nmark = X_train.shape[1] # generate multi-cell inputs if subset_selection == 'outlier': # here we assume that class 0 is always the control class and class 1 is the test class # TODO: extend for more classes x_ctrl_train = X_train[y_train == 0] nsubset_ctrl = nsubset / np.sum(train_phenotypes == 0) nsubset_biased = nsubset / np.sum(train_phenotypes == 1) to_keep = int(0.01 * (X_train.shape[0] - x_ctrl_train.shape[0])) X_tr, y_tr = generate_biased_subsets(X_train, train_phenotypes, id_train, x_ctrl_train, nsubset_ctrl, nsubset_biased, ncell, to_keep, id_ctrl=np.where(train_phenotypes == 0)[0], id_biased=np.where(train_phenotypes == 1)[0]) if (valid_samples is not None) or generate_valid_set: x_ctrl_valid = X_valid[y_valid == 0] nsubset_ctrl = nsubset / np.sum(valid_phenotypes == 0) nsubset_biased = nsubset / np.sum(valid_phenotypes == 1) to_keep = int(0.01 * (X_valid.shape[0] - x_ctrl_valid.shape[0])) X_v, y_v = generate_biased_subsets(X_valid, valid_phenotypes, id_valid, x_ctrl_valid, nsubset_ctrl, nsubset_biased, ncell, to_keep, id_ctrl=np.where(valid_phenotypes == 0)[0], id_biased=np.where(valid_phenotypes == 1)[0]) # TODO: right now equal number of subsets is drawn from each sample # Do it per phenotype instead? elif subset_selection == 'kmeans': X_tr, y_tr = generate_subsets(X_train, train_phenotypes, id_train, nsubset, ncell, k_init=True) if (valid_samples is not None) or generate_valid_set: X_v, y_v = generate_subsets(X_valid, valid_phenotypes, id_valid, nsubset/2, ncell, k_init=True) else: X_tr, y_tr = generate_subsets(X_train, train_phenotypes, id_train, nsubset, ncell, k_init=False) if (valid_samples is not None) or generate_valid_set: X_v, y_v = generate_subsets(X_valid, valid_phenotypes, id_valid, nsubset/2, ncell, k_init=False) ## neural network configuration ## # batch size bs = 128 # the input and convolutional layers input_conv_layers = [ (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell)}), (layers.Conv1DLayer, {'name': 'conv', 'b': init.Constant(0.), 'W': init.Uniform(range=0.01), 'num_filters': nfilter, 'filter_size': 1})] # the pooling layer # max-pooling detects cell presence # mean-pooling detects cell frequency if pooling == 'max': if ncell_pooled is None: pooling_layers = [(layers.MaxPool1DLayer, {'name': 'maxPool', 'pool_size' : ncell})] else: pooling_layers = [ (SelectCellLayer, {'name': 'select', 'num_cell': ncell_pooled}), (layers.Pool1DLayer, {'name': 'maxPool', 'pool_size' : ncell_pooled, 'mode': 'average_exc_pad'})] elif pooling == 'mean': pooling_layers = [(layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell, 'mode': 'average_exc_pad'})] else: sys.stderr.write("Undefined pooling type: %s\n" % pooling) sys.exit(-1) # the output layer if not regression: n_out = len(np.unique(train_phenotypes)) output_nonlinearity = T.nnet.softmax else: n_out = 1 output_nonlinearity = T.tanh output_layers = [(layers.DenseLayer, {'name': 'output', 'num_units': n_out, 'W': init.Uniform(range=0.01), 'b': init.Constant(0.), 'nonlinearity': output_nonlinearity})] # combine all the network layers layers_0 = input_conv_layers + pooling_layers + output_layers # train some neural networks with different parameter configurations w_store = dict() accuracies = np.empty(nrun) for irun in range(nrun): if verbose: print 'training network: %d' % (irun + 1) if (valid_samples is not None) or generate_valid_set: # build a convolutional neural network net1 = MyNeuralNet( layers = layers_0, # objective function and weight decay penalties objective = weight_decay_objective, objective_penalty_conv = l2_weight_decay_conv, objective_penalty_output = l2_weight_decay_out, # optimization method update = nesterov_momentum, update_learning_rate = theano.shared(float32(learning_rate)), update_momentum = theano.shared(float32(momentum)), # batches batch_iterator_train = BatchIterator(batch_size = bs), batch_iterator_test = BatchIterator(batch_size = bs), on_epoch_finished = [EarlyStopping(patience=3)], train_split = TrainSplit(eval_size=None), regression = regression, max_epochs = max_epochs, verbose=verbose) # train the model if regression: net1.fit(float32(X_tr), float32(y_tr.reshape(-1,1)), float32(X_v), float32(y_v.reshape(-1,1))) valid_loss = net1.score(float32(X_v), float32(y_v.reshape(-1,1))) valid_accuracy = - valid_loss else: net1.fit(float32(X_tr), int32(y_tr), float32(X_v), int32(y_v)) valid_accuracy = net1.score(float32(X_v), int32(y_v)) else: # build a convolutional neural network without validation set net1 = NeuralNet( layers = layers_0, # objective function and weight decay penalties objective = weight_decay_objective, objective_penalty_conv = l2_weight_decay_conv, objective_penalty_output = l2_weight_decay_out, # optimization method update = nesterov_momentum, update_learning_rate = theano.shared(float32(learning_rate)), update_momentum = theano.shared(float32(momentum)), # batches batch_iterator_train = BatchIterator(batch_size = bs), batch_iterator_test = BatchIterator(batch_size = bs), on_epoch_finished = [], train_split = TrainSplit(eval_size=None), regression = regression, max_epochs = max_epochs, verbose=verbose) # train the model if regression: net1.fit(float32(X_tr), float32(y_tr.reshape(-1,1))) valid_accuracy = 0 else: net1.fit(float32(X_tr), int32(y_tr)) valid_accuracy = 0 # extract the network parameters w_store[irun] = net1.get_all_params_values() accuracies[irun] = valid_accuracy # which filter weights should we return # 'best': return the filter weights of the model with highest validation accuracy # 'consensus': return consensus filters based on hierarchical clustering # 'consensus_priority': prioritize the consensus filter that corresponds # to the biggest cluster # this option only makes sense if validation samples were provided/generated best_net, w_best_net, best_accuracy = None, None, None if select_filters == 'best': best_net = w_store[np.argmax(accuracies)] w_best_net = param_vector(best_net, regression) best_accuracy = np.max(accuracies) w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=False) elif select_filters == 'consensus': w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=False) elif select_filters == 'consensus_priority': w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=True) else: sys.stderr.write("Undefined option for selecting filters: %s\n" % select_filters) sys.exit(-1) print 'undefined option for selecting filters' if (valid_samples is not None) or generate_valid_set: X = np.vstack([X_train, X_valid]) y = np.hstack([y_train, y_valid]) z = np.vstack([z_train, z_valid]) else: X = X_train y = y_train z = z_train # predict using CellCnn if select_filters == 'consensus_priority': params = w_cons w, b = params[:-2], params[-2] x1 = X[y == 1] x0 = X[y == 0] cnn_pred = np.sum(w.reshape(1,-1) * x1, axis=1) + b else: cnn_pred = None results = { 'clustering_result': cluster_res, 'best_net': best_net, 'w_best_net': w_best_net, 'selected_filters': w_cons, 'accuracies': accuracies, 'best_accuracy': best_accuracy, 'cnn_pred': cnn_pred, 'labels': labels, 'X': X, 'y': y, 'z': z} if benchmark_scores: # predict using outlier detection outlier_pred = knn_dist_memory_optimized(x1, x0, s=200000) # predict using multi-cell input logistic regression X_tr_mean = np.sum(X_tr, axis=-1) clf = LogisticRegression(C=10000, penalty='l2') clf.fit(X_tr_mean, y_tr) w_lr, b_lr = clf.coef_, clf.intercept_ mean_pred = np.sum(w_lr.reshape(1,-1) * x1, axis=1) + b_lr[0] # predict using single-cell input logistic regression clf_sc = LogisticRegression(C=10000, penalty='l2') clf_sc.fit(X, y) w_lr, b_lr = clf_sc.coef_, clf_sc.intercept_ sc_pred = np.sum(w_lr.reshape(1,-1) * x1, axis=1) + b_lr[0] # store the predictions results['outlier_pred'] = outlier_pred results['mean_pred'] = mean_pred results['sc_pred'] = sc_pred return results
#train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope="fc3|fc4") for var in train_vars: print(var.name) every_epoch_to_log = 1 ##################################################################################### ## Retrain the new layers with manually labelled data session.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_loss_history = np.zeros(num_epochs) valid_loss_history = np.zeros(num_epochs) print("============ TRAINING =============") for epoch in range(num_epochs): current_epoch = epoch batch_iterator = BatchIterator(batch_size=batch_size, shuffle=True) for x_batch, y_end_batch in batch_iterator(voxels_train, labels_endpoints_train): y_batch, x2_batch = np.split(y_end_batch, [9], axis=1) session.run(optimizer, feed_dict={ tf_x_batch_new: x_batch, tf_endpoints_batch: x2_batch, tf_y_batch_new: y_batch, is_training_flag: True, flag: False }) #to log the losses get predictions on entire training set p = [] total_loss = 0
def build_net(randomize=False): layers0=[('input',InputLayer), ('dropin',DropoutLayer), ('dense0',DenseLayer), ('dropout0',DropoutLayer), ('dense1',DenseLayer), ('dropout1',DropoutLayer), ('dense2',DenseLayer), ('dropout2',DropoutLayer), ('dense3',DenseLayer), ('dropout3',DropoutLayer), ('output',DenseLayer)] n=[256,1024,1024,256] leak=[0.25,0.00,0.0,0.0] drop=[0.12,0.15,0.2,0.3,0.5] if randomize: for i in range(4): n[i] += np.random.randint(low=-n[i]//15,high=n[i]//15) """ for i in range(4): drop[i] *= np.random.uniform(0.8,1.2) leak[0]=np.random.uniform(0.2,0.3) leak[1]=np.random.uniform(0,0.1) leak[2]=np.random.uniform(0.0,0.05) """ print "net: ", n,leak,drop net0=NeuralNet(layers=layers0, input_shape=(None,num_features), dropin_p=drop[0], dense0_num_units=n[0], dense0_W=HeNormal(), dense0_nonlinearity=LeakyRectify(leak[0]), dropout0_p=drop[1], dense1_num_units=n[1], dense1_nonlinearity=LeakyRectify(leak[1]), dense1_W=HeNormal(), dropout1_p=drop[2], dense2_num_units=n[2], dense2_nonlinearity=LeakyRectify(leak[2]), dense2_W=HeNormal(), dropout2_p=drop[3], dense3_num_units=n[3], dense3_nonlinearity=LeakyRectify(leak[3]), dense3_W=HeNormal(), dropout3_p=drop[4], output_num_units=num_classes, output_nonlinearity=softmax, update=nesterov_momentum, update_learning_rate = theano.shared(tfloat32(0.02)), update_momentum = theano.shared(tfloat32(0.9)), eval_size=0.0, verbose=1, max_epochs=150, on_epoch_finished=[ AdjustVariable('update_learning_rate', epochs=[50,100],rates=[2e-3,2e-4])], regularization_rate=1e-5, batch_iterator_train=BatchIterator(batch_size=128) ) return net0
# (layers.Conv2DLayer, {'num_filters': 512, 'filter_size': 3}), # (layers.MaxPool2DLayer, {'pool_size': 2,'stride':2}), # two dense layers with dropout (layers.DenseLayer, { 'num_units': 2048 }), (layers.DropoutLayer, {}), (layers.DenseLayer, { 'num_units': 1024 }), # the output layer (layers.DenseLayer, { 'num_units': 11, 'nonlinearity': lasagne.nonlinearities.softmax }), ] net0 = NeuralNet( layers=layers0, update_learning_rate=0.0001, max_epochs=100, update=adam, objective_l2=0.0025, batch_iterator_train=BatchIterator(batch_size=60), batch_iterator_test=BatchIterator(batch_size=29), train_split=TrainSplit(eval_size=0.02), verbose=1, )
('hidden1', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, x_train.shape[1]), hidden0_num_units=15, hidden0_nonlinearity=scaled_tanh, hidden1_num_units=15, hidden1_nonlinearity=scaled_tanh, output_num_units=1, output_nonlinearity=nonlinearities.linear, regression=True, verbose=1, max_epochs=250, update=lasagne.updates.adagrad, # on_epoch_finished=[EarlyStopping(patience=100), rbw], # on_training_finished=[rbw.restore], train_split=TrainSplit(eval_size=0.3), batch_iterator_train=BatchIterator(batch_size=128), ) # Set up the gridsearch param_grid = { 'hidden0_num_units': range(5, 32), 'hidden1_num_units': range(5, 32), } grid_search = GridSearchCV(net, param_grid, verbose=0, n_jobs=20, pre_dispatch='2*n_jobs', scoring='mean_squared_error')
def run_cross_validation(nfolds=10): unique_drivers = trainingLabels['subject'].unique() #driver_id = {} #for d_id in unique_drivers: #driver_id[d_id] = trainingLabels.loc[trainingLabels['subject'] == d_id]['img'] driver_id = trainingLabels['subject'] #print('Unique drivers ' + str(unique_drivers) + '\n' + str(driver_id)) kf = cross_validation.KFold(len(unique_drivers), n_folds=nfolds, shuffle=True) num_fold = 1 yfull_train = dict() yfull_test = [] for train_drivers, test_drivers in kf: unique_list_train = [unique_drivers[i] for i in train_drivers] #print('Unique drivers train ' + str(unique_list_train)) X_train, Y_train, train_index = copy_selected_drivers(xTrain, y, driver_id, unique_list_train) unique_list_valid = [unique_drivers[i] for i in test_drivers] #print('Unique drivers validation ' + str(unique_list_valid)) X_valid, Y_valid, test_index = copy_selected_drivers(xTrain, y, driver_id, unique_list_valid) outputLayer = None if source == '1': outputLayer = getNet1() #outputLayer = getNet2() #outputLayer = getNet8() numberEpochs = 30 elif source == '2': outputLayer = getNet3() numberEpochs = 30 elif source == '3': outputLayer = getNet4() #outputLayer = getNet5() #outputLayer = getNet6() #outputLayer = getNet7() numberEpochs = 10 elif source == '4': outputLayer = getNet9() #outputLayer = getNet10() numberEpochs = 30 net = NeuralNet( layers = outputLayer, update=updates.nesterov_momentum, #update=updates.adam, #update=updates.rmsprop, #update=updates.adadelta, update_learning_rate = 0.001, #update_beta1 = 0.9, #update_beta2 = 0.999, #update_epsilon = 1e-8, update_momentum = 0.9, #update_rho=0.95, #update_epsilon=1e-06, objective_loss_function = objectives.categorical_crossentropy, #objective=objectives.categorical_crossentropy, batch_iterator_train = BatchIterator(batch_size = batchSize), batch_iterator_test = BatchIterator(batch_size = batchSize), #custom_scores = [objectives.categorical_accuracy], use_label_encoder = True, #use_label_encoder = False, regression = False, max_epochs = numberEpochs, verbose = 1 ) net.fit(X_train, Y_train) predictValidY = net.predict_proba(X_valid) score = metrics.log_loss(Y_valid, predictValidY) print('Fold ' + str(num_fold) + ' score ' + str(score)) for i in range(len(test_index)): yfull_train[test_index[i]] = predictValidY[i] test_prediction = net.predict_proba(xTest) yfull_test.append(test_prediction) if gcEnabled: gc.collect() num_fold += 1 score = metrics.log_loss(y, dict_to_list(yfull_train)) print('Final log loss ' + str(score)) test_res = merge_several_folds_mean(yfull_test, nfolds) predictions = pd.DataFrame(test_res, index=files) predictions.index.name = 'img' predictions.columns = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'] predictions.to_csv(predictionsFile)
def test(): if cfg['evaluation']['online_training']: print("Start evaluation and online training...") print("offline_validation...") prediction = predict(netSpec, xVal) probabilities = netSpec.predict_proba(xVal) print("Performance_on_relevant_data") result = yVal == prediction faults = yVal != prediction acc_val = float(np.sum(result)) / float(len(result)) print "Accuracy_validation: ", acc_val print "Error_rate_(%): ", 100 * (1 - acc_val) relTrain = yTrain != label_values.noise relVal = yVal != label_values.noise print 'Ratio_validation_relevant_data:', float( np.count_nonzero(relVal)) / (np.count_nonzero(relVal) + np.count_nonzero(relTrain)) rresult = yVal[relVal] == prediction[relVal] acc_val_relevant = float(np.sum(rresult)) / float(len(rresult)) print "Accuracy_for_relevant_data: ", acc_val_relevant print "Error_rate_for_relevant_data_(%): ", 100 * (1 - acc_val_relevant) prediction = np.zeros((xVal.shape[0]), dtype=np.int32) probabilities = np.zeros((xVal.shape[0], 2), dtype=np.float32) batch_size = 128 print "xVal.shape[0]", xVal.shape[0] for i in range(0, xVal.shape[0] - batch_size, batch_size): fragment_xVal = xVal[i:i + batch_size] fragment_prediction = predict(netSpec, fragment_xVal) prediction[i:i + batch_size] = fragment_prediction fragment_probabilities = netSpec.predict_proba(fragment_xVal) probabilities[i:i + batch_size] = fragment_probabilities new_fragment_probabilities = radicalize(fragment_probabilities) print "fragment_xVal.shape", fragment_xVal.shape print "new_fragment_probabilities", new_fragment_probabilities netSpec.partial_fit(fragment_xVal, new_fragment_probabilities) else: print("Validating...") if include_userdata: prediction = predict(netSpec, {'sensors': xVal, 'user': udVal}) probabilities = netSpec.predict_proba({ 'sensors': xVal, 'user': udVal }) print "probabilities.shape", probabilities.shape else: prediction = predict(netSpec, xVal) probabilities = netSpec.predict_proba(xVal) print "probabilities.shape", probabilities.shape print("Showing last 30 test samples..") print("Predictions:") print(prediction[-30:]) print("Ground Truth:") print(yVal[-30:]) print("Performance on relevant data") result = yVal == prediction faults = yVal != prediction acc_val = float(np.sum(result)) / float(len(result)) print "Accuracy validation: ", acc_val print "Error rate (%): ", 100 * (1 - acc_val) #print np.nonzero(faults) print "yVal", yVal if args.plot_prob_dist: rrprobs = probabilities[relVal] rrprobs_idx = prediction[relVal] rrprobs = rrprobs[np.arange(rrprobs_idx.size), rrprobs_idx] rrprobs_correct = rrprobs[rresult] rrprobs_wrong = rrprobs[np.invert(rresult)] numBins = 40 p1 = plt.hist(rrprobs_correct, numBins, color='green', alpha=0.5, label="Correct samples") p2 = plt.hist(rrprobs_wrong, numBins, color='red', alpha=0.5, label="Wrong samples") max_bin_size = max(max(p1[0]), max(p2[0])) plt.plot((np.median(rrprobs_correct), np.median(rrprobs_correct)), (0, max_bin_size), 'g-', label="Median prob for correct samples") plt.plot((np.median(rrprobs_wrong), np.median(rrprobs_wrong)), (0, max_bin_size), 'r-', label="Median prob for false samples") plt.title("Distribution of predicted probabilities") plt.legend(loc='upper center', numpoints=1, bbox_to_anchor=(0.5, -0.05), ncol=2, fancybox=True, shadow=True) dest_str = "" for session in args.include_session: dest_str = dest_str + '_' + session plt.savefig('dist_proba' + dest_str + '.png', bbox_inches='tight') plt.show() # selVal = aVal['saturated'] # tresult = yVal[selVal]==prediction[selVal] # print "Ratio selection:", float(np.count_nonzero(selVal))/len(xVal) # acc_val_sel = float(np.sum(tresult))/float(len(tresult)+0.0001) # print "Accuracy for selection", acc_val_sel # print "Error rate for selection val data (%): ", 100*(1-acc_val_sel) from sklearn.metrics import confusion_matrix cm = confusion_matrix(yVal, prediction) print cm from sklearn.metrics import roc_auc_score, log_loss print "roc_auc:", roc_auc_score(yVal, probabilities[:, 1]) print "log_loss", log_loss(yVal, probabilities[:, 1]) print "Changing batch iterator test:" from nolearn.lasagne import BatchIterator netSpec.batch_iterator_test = BatchIterator(batch_size=256) print "Calculating final prediction for the hour long sessions" print "magnitudes_normal_val.shape", g.magnitudes_normal_val.shape probabilities_hour = [] for mag_hour in g.magnitudes_normal_val: patches = rolling_window_ext(mag_hour, (magnitude_window, ceil - floor)) patches = np.swapaxes(patches, 0, 2) predictions_patches = netSpec.predict_proba(patches[0]) prediction_hour = np.sum(predictions_patches, axis=0) / predictions_patches.shape[0] probabilities_hour.append(prediction_hour[1]) print "magnitudes_seizure_val.shape", g.magnitudes_seizure_val.shape for mag_hour in g.magnitudes_seizure_val: patches = rolling_window_ext(mag_hour, (magnitude_window, ceil - floor)) patches = np.swapaxes(patches, 0, 2) predictions_patches = netSpec.predict_proba(patches[0]) prediction_hour = np.sum(predictions_patches, axis=0) / predictions_patches.shape[0] print prediction_hour probabilities_hour.append(prediction_hour[1]) yVal_hour = np.hstack((np.zeros(g.magnitudes_normal_val.shape[0]), np.ones(g.magnitudes_seizure_val.shape[0]))) print "roc_auc for the hours:", roc_auc_score(yVal_hour, probabilities_hour) print "log_loss for the hours", log_loss(yVal_hour, probabilities_hour) print "saving predictions to csv file" from datetime import datetime patient_str = '-'.join(args.patients) csv_filename = 'hours' + patient_str + '_' + cfg['training'][ 'model'] + '_' + datetime.now().strftime("%m-%d-%H-%M-%S") + '.csv' print csv_filename csv = open('./results/' + csv_filename, 'w+') for i in range(yVal_hour.shape[0]): csv.write(str(yVal_hour[i]) + ',' + str(probabilities_hour[i]) + '\n') csv.close predictions_hour = np.round(probabilities_hour) result_hour = yVal_hour == predictions_hour acc_val_hour = float(np.sum(result_hour)) / float(len(result_hour)) print "Accuracy validation for the hours: ", acc_val_hour print "Calculating the predictions for the test files" preprocess_test_data() probabilities_test = [] for mag_test in magnitudes_test: patches = rolling_window_ext(mag_test, (magnitude_window, ceil - floor)) patches = np.swapaxes(patches, 0, 2) predictions_patches = netSpec.predict_proba(patches[0]) prediction_test = np.sum(predictions_patches, axis=0) / predictions_patches.shape[0] probabilities_test.append(prediction_test[1]) print "saving predictions to csv file" from datetime import datetime csv_filename = patient_str + '_' + cfg['training'][ 'model'] + '_' + datetime.now().strftime("%m-%d-%H-%M-%S") + '.csv' print csv_filename csv = open('./results/' + csv_filename, 'w+') counter = 0 for dataset in datasets.all: if dataset.enabled and not dataset.trainset: for i in range(int(dataset.no_files * args.debug_sub_ratio)): filename = dataset.base_name + str(i + 1) + '.mat' csv.write(filename + ',' + str(probabilities_test[counter + i]) + '\n') csv.close
hidden2_num_units = NUM_HIDDEN_UNITS_2, dropout2_p = HIDDEN2_DROPOUT_P, # hidden 2 dropout output_num_units = NUM_OUTPUT, # Changeing rate and momentum update_learning_rate = theano.shared(float32(LEARNING_RATE_START)), update_momentum = theano.shared(float32(MOMENTUM_START)), #### changing learning rate and momentum on_epoch_finished=[ AdjustVariable('update_learning_rate', start=LEARNING_RATE_START_LOG, stop=LEARNING_RATE_END_LOG), AdjustVariable('update_momentum', start=MOMENTUM_START_LOG, stop=MOMENTUM_END_LOG), ], # Other params batch_iterator_train = BatchIterator(batch_size = BATCH_SIZE), max_epochs = MAX_EPOCHS, update = nesterov_momentum, verbose = 1, output_nonlinearity = softmax, eval_size = EVAL_SIZE, l2_lambda = L2_LAMBDA, l1_lambda = L1_LAMBDA ) ### main process starts here ### df = pd.read_csv('train.csv') # reading data ... df_test = pd.read_csv('test.csv') # pre-processing data df.target = df.target.apply(lambda x: x.split("_", 1)[1]) # convert class_x -> x
def train_model(params, X_train, y_train, X_valid, y_valid): """ Performs model training based on provided training dataset according to provided parameters, and then evaluates trained model with testing dataset. Part of the training dataset may be used for validation during training if specified in model parameters. Parameters ---------- params : Parameters Structure (`namedtuple`) containing model parameters. X_train : Training dataset. y_train : Training dataset labels. X_valid : Validation dataset. y_valid : Validation dataset labels. X_test : Testing dataset. y_test : Testing dataset labels. logger_config : Logger configuration, containing Dropbox and Telegram settings for notifications and cloud logs backup. """ # Initialisation routines: generate variable scope, create logger, note start time. paths = Paths(params) log = ModelCloudLog( os.path.join(paths.root_path, "logs") #dropbox_token=logger_config["dropbox_token"], #telegram_token=logger_config["telegram_token"], #telegram_chat_id=logger_config["telegram_chat_id"] ) start = time.time() model_variable_scope = paths.var_scope log.log_parameters(params, y_train.shape[0], y_valid.shape[0]) # Build the graph graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed at run time with a training minibatch. tf_x_batch = tf.placeholder(tf.float32, shape=(None, params.image_size[0], params.image_size[1], 1)) tf_y_batch = tf.placeholder(tf.float32, shape=(None, params.num_classes)) is_training = tf.placeholder(tf.bool) current_epoch = tf.Variable( 0, trainable=False) # count the number of epochs # Model parameters. if params.learning_rate_decay: learning_rate = tf.train.exponential_decay( params.learning_rate, current_epoch, decay_steps=params.max_epochs, decay_rate=0.01) else: learning_rate = params.learning_rate # Training computation. with tf.variable_scope(model_variable_scope): logits = model_pass(tf_x_batch, params, is_training) if params.l2_reg_enabled: with tf.variable_scope('fc4', reuse=True): l2_loss = tf.nn.l2_loss(tf.get_variable('weights')) else: l2_loss = 0 predictions = tf.nn.softmax(logits) softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf_y_batch) loss = tf.reduce_mean( softmax_cross_entropy) + params.l2_lambda * l2_loss # Optimizer. optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) # A routine for evaluating current model parameters def get_accuracy_and_loss_in_batches(X, y): p = [] sce = [] batch_iterator = BatchIterator(batch_size=128) for x_batch, y_batch in batch_iterator(X, y): [p_batch, sce_batch] = session.run([predictions, softmax_cross_entropy], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: False }) p.extend(p_batch) sce.extend(sce_batch) p = np.array(p) sce = np.array(sce) accuracy = 100.0 * np.sum( np.argmax(p, 1) == np.argmax(y, 1)) / p.shape[0] loss = np.mean(sce) return (accuracy, loss) # If we chose to keep training previously trained model, restore session. if params.resume_training: try: tf.train.Saver().restore(session, paths.model_path) except Exception as e: log("Failed restoring previously trained model: file does not exist." ) pass saver = tf.train.Saver() early_stopping = EarlyStopping(tf.train.Saver(), session, patience=params.early_stopping_patience, minimize=True) train_loss_history = np.empty([0], dtype=np.float32) train_accuracy_history = np.empty([0], dtype=np.float32) valid_loss_history = np.empty([0], dtype=np.float32) valid_accuracy_history = np.empty([0], dtype=np.float32) if params.max_epochs > 0: log("================= TRAINING ==================") else: log("================== TESTING ==================") log(" Timestamp: " + get_time_hhmmss()) log.sync() for epoch in range(params.max_epochs): current_epoch = epoch # Train on whole randomised dataset in batches batch_iterator = BatchIterator(batch_size=params.batch_size, shuffle=True) for x_batch, y_batch in batch_iterator(X_train, y_train): session.run([optimizer], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch, is_training: True }) # If another significant epoch ended, we log our losses. if (epoch % params.log_epoch == 0): # Get validation data predictions and log validation loss: valid_accuracy, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) # Get training data predictions and log training loss: train_accuracy, train_loss = get_accuracy_and_loss_in_batches( X_train, y_train) if (epoch % params.print_epoch == 0): log("-------------- EPOCH %4d/%d --------------" % (epoch, params.max_epochs)) log(" Train loss: %.8f, accuracy: %.2f%%" % (train_loss, train_accuracy)) log("Validation loss: %.8f, accuracy: %.2f%%" % (valid_loss, valid_accuracy)) log(" Best loss: %.8f at epoch %d" % (early_stopping.best_monitored_value, early_stopping.best_monitored_epoch)) log(" Elapsed time: " + get_time_hhmmss(start)) log(" Timestamp: " + get_time_hhmmss()) log.sync() else: valid_loss = 0. valid_accuracy = 0. train_loss = 0. train_accuracy = 0. valid_loss_history = np.append(valid_loss_history, [valid_loss]) valid_accuracy_history = np.append(valid_accuracy_history, [valid_accuracy]) train_loss_history = np.append(train_loss_history, [train_loss]) train_accuracy_history = np.append(train_accuracy_history, [train_accuracy]) if params.early_stopping_enabled: # Get validation data predictions and log validation loss: if valid_loss == 0: _, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) if early_stopping(valid_loss, epoch): #log("Early stopping.\nBest monitored loss was {:.8f} at epoch {}.".format( # early_stopping.best_monitored_value, early_stopping.best_monitored_epoch #)) break # Evaluate on test dataset. #test_accuracy, test_loss = get_accuracy_and_loss_in_batches(X_test, y_test) valid_accuracy, valid_loss = get_accuracy_and_loss_in_batches( X_valid, y_valid) log("=============================================") log(" Valid loss: %.8f, accuracy = %.2f%%)" % (valid_loss, valid_accuracy)) #log(" Test loss: %.8f, accuracy = %.2f%%)" % (test_loss, test_accuracy)) log(" Total time: " + get_time_hhmmss(start)) log(" Timestamp: " + get_time_hhmmss()) # Save model weights for future use. saved_model_path = saver.save(session, paths.model_path) log("Model file: " + saved_model_path) np.savez(paths.train_history_path, train_loss_history=train_loss_history, train_accuracy_history=train_accuracy_history, valid_loss_history=valid_loss_history, valid_accuracy_history=valid_accuracy_history) log("Train history file: " + paths.train_history_path) #log.sync(notify=True, message="Finished training with *%.2f%%* accuracy on the testing set (loss = *%.6f*)." % (test_accuracy, test_loss)) plot_learning_curves(params) log.add_plot(notify=True, caption="Learning curves") pyplot.show()
def single_hidden_layer(): """ Run the graph to train the model and predict the test data. :return: """ # Load dataset X, y = load() img = X[11].reshape(96, 96) plt.imshow(img, cmap='gray') # plt.show() x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3) x_test, x_valid, y_test, y_valid = train_test_split(x_test, y_test, test_size=0.5) # Predefined parameters image_size = 96 num_keypoints = 30 batch_size = 36 num_epochs = 1001 learning_rate = 0.01 momentum = 0.9 model_name = "1fc_b" + str(batch_size) + "_e" + str(num_epochs - 1) model_variable_scope = model_name root_location = 'models/' model_path = root_location + model_name + '/model.ckpt' train_history_path = root_location + model_name + '/train_history' os.makedirs(root_location + model_name + '/', exist_ok=True) # Training graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed at run time with a training minibatch tf_x_batch = tf.placeholder(tf.float32, shape=(None, image_size * image_size)) tf_y_batch = tf.placeholder(tf.float32, shape=(None, num_keypoints)) # Training computation. with tf.variable_scope(model_variable_scope): predictions = model_pass(tf_x_batch, num_keypoints) def get_predictions_in_batches(X, session): """ Calculates predictions in batches of 128 examples at a time, using `session`'s calculation graph. Parameters ---------- X : ndarray Dataset to get predictions for. session : TensorFlow session to be used for predicting. Is expected to have a `predictions` var in the graph along with a `tf_x_batch` placeholder for incoming data. Returns ------- N-dimensional array of predictions. """ p = [] batch_iterator = BatchIterator(batch_size=128) for x_batch, _ in batch_iterator(X): [p_batch] = session.run([predictions], feed_dict={tf_x_batch: x_batch}) p.extend(p_batch) return p loss = tf.reduce_mean(tf.square(predictions - tf_y_batch)) # Optimizer. optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=momentum, use_nesterov=True).minimize(loss) start = time.time() every_epoch_to_log = 5 with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_loss_history = np.zeros(num_epochs) valid_loss_history = np.zeros(num_epochs) print("============TRAINING============") # logging.info('============TRAINING============') for epoch in range(num_epochs): # Train on whole randomised dataset in batches batch_iterator = BatchIterator(batch_size=batch_size, shuffle=True) for x_batch, y_batch in batch_iterator(x_train, y_train): session.run([optimizer], feed_dict={ tf_x_batch: x_batch, tf_y_batch: y_batch }) # If another significant epoch ended, we log our losses. if (epoch % every_epoch_to_log) == 0: # Get training data predictions and log training loss: train_loss = calc_loss( get_predictions_in_batches(x_train, session), y_train) train_loss_history[epoch] = train_loss # Get validation data predictions and log validation loss: valid_loss = calc_loss( get_predictions_in_batches(x_valid, session), y_valid) valid_loss_history[epoch] = valid_loss if (epoch % 100) == 0: print('-----EPOCH %4d/%d' % (epoch, num_epochs)) print(' Train loss: %.8f' % train_loss) print('Validation loss: %.8f' % valid_loss) print(' Time:' + get_time_hhmmss(start)) # logging.info('-----EPOCH %4d/%d' % (epoch, num_epochs)) # logging.info(' Train loss: %.8f' % train_loss) # logging.info('Validation loss: %.8f' % valid_loss) # logging.info(' Time:' + get_time_hhmmss(start)) # Evaluate on test dataset. test_loss = calc_loss(get_predictions_in_batches(x_test, session), y_test) print('==========================================') print('Test score: %.3f(loss = %.8f)' % (np.sqrt(test_loss) * 48.0, test_loss)) print('Total time: ' + get_time_hhmmss(start)) # logging.info('==========================================') # logging.info('Test score: %.3f(loss = %.8f' % (np.sqrt(test_loss)*48.0, test_loss)) # logging.info('Total time: ' + get_time_hhmmss(start)) save_path = saver.save(session, model_path) print('Model file:' + save_path) # logging.info('Model file:' + save_path) np.savez(train_history_path, train_loss_history=train_loss_history, valid_loss_history=valid_loss_history) print('Train history file: ' + train_history_path) # logging.info('Train history file: ' + train_history_path) new_model_epochs = plot_learning_curves(root_location, model_name) plt.grid() plt.legend() plt.xlabel('epoch') plt.ylabel('loss') plt.ylim(0.0005, 0.01) plt.xlim(0, new_model_epochs) plt.yscale('log') # plt.show() X, _ = load(test=True) with graph.as_default(): tf_x = tf.constant(X) with tf.variable_scope(model_variable_scope, reuse=True): tf_p = model_pass(tf_x) with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) load_path = saver.restore(session, model_path) p = tf_p.eval() fig = plt.figure(figsize=(6, 6)) fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) for i in range(16): ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[]) plot_sample(X[i], p[i], ax) plt.show()
update=adam, update_learning_rate=theano.shared(float32(0.0003), borrow=True), # update_momentum=theano.shared(float32(0.001), borrow=True), update_beta1=0.9, update_beta2=0.99, update_epsilon=1e-06, on_epoch_finished=[ # AdjustVariable('update_learning_rate', start=0.3, stop=0.05), # AdjustVariable('update_momentum', start=0.001, stop=0.00299), # EarlyStopping(patience=200), ], regression=True, train_split=TrainSplit(eval_size=0.00), y_tensor_type=T.matrix, verbose=1, batch_iterator_train=BatchIterator(3200), max_epochs=100) #np.random.seed(7) #net0_clone = clone(net0) #net0_clone.fit(t1nn_conc_shared.get_value(), y) #net0_clone.fit(X_encoded_shared.get_value(), y) cv_by_hand = [(np.where(cvFolds != fold)[0], np.where(cvFolds == fold)[0]) for fold in np.unique(cvFolds)] foldPred = np.zeros((t1nn_conc_shared.get_value().shape[0], 1)) bags = 10 for iter in xrange(0, bags): for fold in xrange(0, np.max(cvFolds)): np.random.seed(iter + 56)
('hidden1', DenseLayer), ('output', DenseLayer), ], input_shape=(None, 784), output_num_units=10, output_nonlinearity=softmax, eval_size=0.0, more_params=dict( hidden1_num_units=200, ), update=nesterov_momentum, update_learning_rate=0.02, update_momentum=0.9, batch_iterator_train=BatchIterator(batch_size=300), max_epochs=10, verbose=1) classifiers.append(('nolearn.lasagne', clf)) RUNS = 10 for name, orig in classifiers: times = [] accuracies = [] for i in range(RUNS): start = time.time() clf = clone(orig)