# Input -> LSTM -> Outstate inputs = tf.placeholder(tf.int32, [batch_size, maxlength]) labels = tf.placeholder(tf.float32, [batch_size, labelspace]) lengths = tf.placeholder(tf.int32, [batch_size]) # RNN lstm = tf.nn.rnn_cell.LSTMCell(hiddendim, initializer=tf.contrib.layers.xavier_initializer(seed=20160501)) lstm = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout) # Prediction output_layer = Layer.W(2*hiddendim, labelspace, 'Output') output_bias = Layer.b(labelspace, 'OutputBias') outputs, fstate = tf.nn.dynamic_rnn(lstm, embeddings.lookup(inputs), sequence_length=lengths, dtype=tf.float32) logits = tf.matmul(fstate, output_layer) + output_bias loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) ## Learning ## ## Optimizer Adam/RMSPropOptimizer tf.train.AdamOptimizer() optimizer = tf.train.AdamOptimizer() ## Gradient Clipping: ##tvars = tf.trainable_variables() ##grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 5.0) ##train_op = optimizer.apply_gradients(zip(grads, tvars)) train_op = optimizer.minimize(loss) correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
onehot = True inputdim = maxlength*vocabsize if onehot else maxlength*embeddingdim # Define embeddings matrix embeddings = Embedding(vocabsize, one_hot=onehot, embedding_size=embeddingdim) # Input data. dataset = tf.placeholder(tf.int32, shape=[batch_size, maxlength], name='Train') labels = tf.placeholder(tf.float32, shape=[batch_size, labelspace], name='Label') # Model hidden_layer = Layer.W(inputdim, hiddendim, 'Hidden') hidden_bias = Layer.b(hiddendim, 'HiddenBias') # Prediction output_layer = Layer.W(hiddendim, labelspace, 'Output') output_bias = Layer.b(labelspace, 'OutputBias') embedded = tf.reshape(embeddings.lookup(dataset), [batch_size,inputdim]) forward = tf.nn.relu(tf.matmul(embedded, hidden_layer) + hidden_bias) dropout = tf.nn.dropout(forward, 0.5) logits = tf.matmul(dropout, output_layer) + output_bias loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) train_op = tf.train.AdamOptimizer().minimize(loss) correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) # Model params Trainer = Training(sess, correct_prediction, train_op, loss, dataset, labels) # Run training Trainer.train(training, training_labels, development, development_labels, generate_batch)
# Define embeddings matrix embeddings = Embedding(vocabsize, one_hot=onehot, embedding_size=embeddingdim) # Input data. dataset = tf.placeholder(tf.int32, shape=[batch_size, maxlength], name='Train') labels = tf.placeholder(tf.float32, shape=[batch_size, labelspace], name='Label') # Model hidden_layer = Layer.W(inputdim, hiddendim, 'Hidden') hidden_bias = Layer.b(hiddendim, 'HiddenBias') # Prediction output_layer = Layer.W(hiddendim, labelspace, 'Output') output_bias = Layer.b(labelspace, 'OutputBias') embedded = tf.reshape(embeddings.lookup(dataset), [batch_size, inputdim]) forward = tf.nn.relu(tf.matmul(embedded, hidden_layer) + hidden_bias) dropout = tf.nn.dropout(forward, 0.5) logits = tf.matmul(dropout, output_layer) + output_bias loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) train_op = tf.train.AdamOptimizer().minimize(loss) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) # Model params Trainer = Training(sess, correct_prediction, train_op, loss, dataset, labels) # Run training Trainer.train(training, training_labels, development, development_labels,
# RNN dropout = 0.75 lstm = tf.nn.rnn_cell.LSTMCell( hiddendim, initializer=tf.contrib.layers.xavier_initializer(seed=20160501)) lstm = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout) # Encode from 18x18 to 12x12 l1 = conv2d('l1', cur_world, W['cl1'], B['cb1'], padding='VALID') # -> 32->30 18->16 l2 = conv2d('l2', l1, W['cl2'], B['cb2'], padding='VALID') # -> 30-28 16->14 l3 = conv2d('l3', l2, W['cl3'], B['cb3'], padding='VALID') # -> 28->26 14->12 l4 = conv2d('l4', l3, W['cl4'], B['cb4'], padding='VALID') # -> 26->24 12->10 l5 = conv2d('l5', l4, W['cl5'], B['cb5'], padding='VALID') # -> 24->22 10->8 outputs, fstate = tf.nn.dynamic_rnn(lstm, embeddings.lookup(inputs), sequence_length=lengths, dtype=tf.float32) # Concatenate RNN output to CNN representation logits = tf.matmul( tf.concat(1, [ fstate, tf.reshape(l5, [batch_size, final_size * final_size * filters]) ]), W['out']) + B['out'] correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(next_world, 1)) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, next_world)) optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss)