def lstm_decoder(H, y, opt, prefix='', feed_previous=False, is_reuse=None): y = tf.unstack(y, axis=1) H0 = tf.squeeze(H) H1 = (H0, tf.zeros_like(H0)) # initialize H and C with tf.variable_scope(prefix + 'lstm_decoder', reuse=True): cell = tf.contrib.rnn.LSTMCell(opt.n_hid) with tf.variable_scope(prefix + 'lstm_decoder', reuse=is_reuse): weightInit = weight_init W = tf.get_variable('W', [opt.n_hid, opt.n_words], initializer=weightInit) b = tf.get_variable('b', [opt.n_words], initializer=bias_init) out_proj = (W, b) if feed_previous else None outputs, _ = embedding_rnn_decoder(decoder_inputs=y, initial_state=H1, cell=cell, feed_previous=feed_previous, output_projection=out_proj, num_symbols=opt.n_words, embedding_size=opt.embed_size) logits = [nn_ops.xw_plus_b(out, W, b) for out in outputs] syn_sents = [math_ops.argmax(l, 1) for l in logits] syn_sents = tf.stack(syn_sents, 1) # outputs : batch * len loss = sequence_loss( logits[:-1], y[1:], [tf.cast(tf.ones_like(yy), tf.float32) for yy in y[1:]]) return loss, syn_sents, logits
def build_model(self): # encoder self.encoder_inputs = tf.placeholder(tf.float32, [None, self.encoder_n_steps, self.encoder_n_input], name="encoder") self.encoder_outputs, self.encoder_hidden_state = self.encoder_RNN(self.encoder_inputs) # decoder self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)) for i in range(self.decoder_n_steps + 1)] self.target_weights = [tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)) for i in range(self.decoder_n_steps)] self.targets = [self.decoder_inputs[i+1] for i in range(self.decoder_n_steps)] decoder_cell = self.single_cell(self.decoder_n_hidden) decoder_proj_w = tf.get_variable("proj_w", [self.decoder_n_hidden, self.decoder_symbols_size]) decoder_proj_b = tf.get_variable("proj_b", [self.decoder_symbols_size]) self.decoder_output_projection = (decoder_proj_w, decoder_proj_b) if self.decoder_output_projection is None: decoder_cell = rnn.core_rnn_cell.OutputProjectionWrapper(decoder_cell, self.decoder_symbols_size) if not self.use_embedding: constant_embedding = np.ones([self.decoder_symbols_size, 1], dtype=np.float32) for i in range(self.decoder_symbols_size): constant_embedding[i] = np.array([i], dtype=np.float32) self.fake_embedding =tf.constant(constant_embedding) self.attns_weights = None if self.use_attention: # attention top_states = [tf.reshape(e, [-1, 1, self.decoder_n_hidden]) for e in self.encoder_outputs] self.attention_states = tf.concat(top_states, 1) if not self.use_embedding and not self.use_attention: self.outputs, self.decoder_hidden_state = self.noembedding_rnn_decoder(self.decoder_inputs[:self.decoder_n_steps], self.encoder_hidden_state, decoder_cell) elif not self.use_embedding and self.use_attention: self.outputs, self.decoder_hidden_state, self.attns_weights = self.noembedding_attention_rnn_decoder( self.decoder_inputs[:self.decoder_n_steps], self.encoder_hidden_state, self.attention_states, decoder_cell, num_heads=self.num_heads) elif self.use_embedding and not self.use_attention: self.outputs, self.decoder_hidden_state = embedding_rnn_decoder(self.decoder_inputs[:self.decoder_n_steps], self.encoder_hidden_state, decoder_cell, self.decoder_symbols_size, self.decoder_embedding_size, output_projection=self.decoder_output_projection, feed_previous=self.feed_previous) else: self.encoder_hidden_bn = self.encoder_hidden_state# tf.contrib.layers.batch_norm(self.encoder_hidden_state, center=True, scale=True, is_training=self.is_training) self.outputs, self.decoder_hidden_state, self.attns_weights = self.self_embedding_attention_decoder(self.decoder_inputs[:self.decoder_n_steps], self.encoder_hidden_bn, self.attention_states, decoder_cell, self.decoder_symbols_size, self.decoder_embedding_size, output_projection=self.decoder_output_projection, feed_previous=self.feed_previous, num_heads=self.num_heads, init_embedding=self.init_decoder_embedding) # do wx+b for output, to generate decoder_symbols_size length for i in range(self.decoder_n_steps-1): #ignore last output, we only care 40 classes self.outputs[i] = tf.matmul(self.outputs[i], self.decoder_output_projection[0]) + self.decoder_output_projection[1] if self.feed_previous: # do softmax self.logits = tf.nn.softmax(self.outputs[:-1], dim=-1, name="output_softmax") if self.attns_weights is not None: self.attns_weights = self.attns_weights[:-1] # cost function if self.is_training: self.cost = sequence_loss(self.outputs, self.targets, self.target_weights) #self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.9).minimize(self.cost) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)