l2_layers = [] for layer in lasagne.layers.get_all_layers(output_layer): if isinstance(layer,(DCNN.embeddings.SentenceEmbeddingLayer,DCNN.convolutions.Conv1DLayerSplitted,lasagne.layers.DenseLayer)): l2_layers.append(layer) loss_train = lasagne.objectives.aggregate(lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(output_layer),y_batch),mode='mean') \ + lasagne.regularization.regularize_layer_params_weighted(dict(zip(l2_layers,hyperparas["L2"])),lasagne.regularization.l2) # validating/testing loss_eval = lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(output_layer, deterministic=True),y_batch) pred = T.argmax(lasagne.layers.get_output(output_layer , deterministic=True),axis=1) correct_predictions = T.eq(pred, y_batch) # In the matlab code, Kalchbrenner works with a adagrad reset mechanism, if the para --adagrad_reset has value 0, no reset will be applied all_params = lasagne.layers.get_all_params(output_layer) updates, accumulated_grads = utils.adagrad(loss_train, all_params, hyperparas['learning_rate']) #updates = lasagne.updates.adagrad(loss_train, all_params, hyperparas['learning_rate']) train_model = theano.function(inputs=[SENT1_batch,SENT2_batch,y_batch], outputs=loss_train,updates=updates, on_unused_input='warn') valid_model = theano.function(inputs=[SENT1_batch,SENT2_batch,y_batch], outputs=correct_predictions, on_unused_input='warn') test_model = theano.function(inputs=[SENT1_batch,SENT2_batch,y_batch], outputs=correct_predictions, on_unused_input='warn') ############### # TRAIN MODEL # ############### print('Started training')
# Kalchbrenner uses a fine-grained L2 regularization in the Matlab code, default values taken from Matlab code # Training objective l2_layers = [] for layer in lasagne.layers.get_all_layers(output_layer): if isinstance(layer,(DCNN.embeddings.SentenceEmbeddingLayer,DCNN.convolutions.Conv1DLayerSplitted,lasagne.layers.DenseLayer)): l2_layers.append(layer) loss_train = lasagne.objectives.aggregate(lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(output_layer,X_batch),y_batch),mode='mean')+lasagne.regularization.regularize_layer_params_weighted(dict(zip(l2_layers,hyperparas["L2"])),lasagne.regularization.l2) # validating/testing loss_eval = lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(output_layer,X_batch,deterministic=True),y_batch) pred = T.argmax(lasagne.layers.get_output(output_layer, X_batch, deterministic=True),axis=1) correct_predictions = T.eq(pred, y_batch) # In the matlab code, Kalchbrenner works with a adagrad reset mechanism, if the para --adagrad_reset has value 0, no reset will be applied all_params = lasagne.layers.get_all_params(output_layer) updates, accumulated_grads = utils.adagrad(loss_train, all_params, hyperparas['learning_rate']) #updates = lasagne.updates.adagrad(loss_train, all_params, hyperparas['learning_rate']) train_model = theano.function(inputs=[X_batch,y_batch], outputs=loss_train,updates=updates) valid_model = theano.function(inputs=[X_batch,y_batch], outputs=correct_predictions) test_model = theano.function(inputs=[X_batch,y_batch], outputs=correct_predictions) ############### # TRAIN MODEL # ############### print('Started training')