def __init__(self, in_channels, out_channels_1, out_channels_2, KT_1, KT_2, num_nodes, batch_size, frames, frames_0, num_generator): super(Baseline, self).__init__() self.dropout = nn.Dropout(0.1) self.st_1 = st_conv_block(in_channels, out_channels_1, out_channels_2, KT_1, num_nodes, batch_size, frames, frames_0) self.st_2 = st_conv_block(out_channels_2, out_channels_2, out_channels_2, KT_2, num_nodes, batch_size, frames - 4 * (KT_1 - 1), frames_0 - 2 * (KT_1 - 1)) self.output_layer = output_layer(out_channels_2, frames - 4 * (KT_1 + KT_2 - 2), num_nodes, num_generator)
def step_through_session(self, X, attention_mask, return_last_with_hidden_states=False, return_softmax=False, reuse=False): """ Train for a batch of sessions in the HRED X can be a 3-D tensor (steps, batch, vocab) :param X: The input sessions. Lists of ints, ints correspond to words Shape: (max_length x batch_size) :return: """ num_of_steps = tf.shape(X)[0] batch_size = tf.shape(X)[1] # Making embeddings for x embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse) # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the # session encoder when EoQ symbol has been seen yet. eoq_mask = tf.expand_dims(tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32), 2) # eoq mask has size [MAX LEN x BATCH SIZE] --> we want to loop over batch size # BATCH_SIZE = 80 # MAX_LEN = 50 # TODO: this shouldn't be as local # print((embedder, eoq_mask)) # Computes the encoded query state. The tensorflow scan function repeatedly applies the gru_layer_with_reset # function to (embedder, eoq_mask) and it initialized the gru layer with the zero tensor. # In the query encoder we need the possibility to reset the gru layer, namely after the eos symbol has been # reached query_encoder_packed = tf.scan( lambda result_prev, x: layers.gru_layer_with_reset( result_prev[1], # h_reset_prev x, name='forward_query_encoder', x_dim=self.embedding_dim, y_dim=self.query_dim, reuse=reuse ), (embedder, eoq_mask), # scan does not accept multiple tensors so we need to pack and unpack initializer=tf.zeros((2, batch_size, self.query_dim)) ) # print(tf.shape(query_encoder_packed)) query_encoder, hidden_query = tf.unstack(query_encoder_packed, axis=1) # query_encoder = tf.nn.dropout(query_encoder, keep_prob=0.5) # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state. # session_encoder_packed = tf.scan( # lambda result_prev, x: layers.gru_layer_with_retain( # result_prev[1], # h_retain_prev # x, # name='session_encoder', # x_dim=self.query_dim, # 2* # y_dim=self.session_dim, # reuse=reuse # ), # (query_encoder, eoq_mask), # initializer=tf.zeros((2, batch_size, self.session_dim)) # ) # # session_encoder, hidden_session = tf.unstack(session_encoder_packed, axis=1) # session_encoder = layers.gnn_attention(session_encoder, attention_mask, query_encoder_gnn, self.session_dim, # self.query_dim, reuse=reuse) # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special, # as it incorporates the session_encoder into each hidden state update # decoder = tf.scan( # lambda result_prev, x: layers.gru_layer_with_state_reset( # result_prev, # x, # name='decoder', # x_dim=self.embedding_dim, # h_dim=self.query_dim, # y_dim=self.decoder_dim, # reuse=reuse # ), # (embedder, eoq_mask, query_encoder), # # scan does not accept multiple tensors so we need to pack and unpack # initializer=tf.zeros((batch_size, self.decoder_dim)) # ) # After the decoder we add an additional output layer flatten_decoder = tf.reshape(query_encoder, (-1, self.decoder_dim)) flatten_embedder = tf.reshape(embedder, (-1, self.embedding_dim)) # flatten_session_encoder = tf.reshape(session_encoder, (-1, self.session_dim)) # attention # expand to batch_size x num_of_steps x query_dim # query_encoder_T = tf.transpose(query_encoder, perm=[1, 0, 2]) # query_decoder_T = tf.transpose(decoder, perm=[1, 0, 2]) # expand to num_of_steps x batch_size x num_of_steps x query_dim # query_encoder_expanded = tf.tile(tf.expand_dims(query_encoder, 2), (1, 1, num_of_steps, 1)) # query_encoder_expanded = query_encoder_expanded * tf.tile(tf.expand_dims(attention_mask, 3), (1, 1, 1, self.query_dim)) # 2* # flatten_decoder_with_attention = \ # layers.attention_session(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim, # reuse=reuse) # 2* output_layer = layers.output_layer( flatten_embedder, flatten_decoder, # x_dim=self.embedding_dim, h_dim=self.decoder_dim, # 2* y_dim=self.output_dim, reuse=reuse ) # We compute the output logits based on the output layer above flatten_logits, self.l2_loss = layers.logits_layer( output_layer, self.l2_loss, x_dim=self.output_dim, y_dim=self.vocab_size, reuse=reuse ) logits = tf.reshape(flatten_logits, (num_of_steps, batch_size, self.vocab_size)) # logits = tf.Print(logits, [np.argmax(logits)], summarize=1500) # If we want the softmax back from this step or just the logits f or the loss function if return_softmax: output = self.softmax(logits) else: output = logits # If we want to continue decoding with single_step we need the hidden states of all GRU layers if return_last_with_hidden_states: # hidden_decoder = decoder # there is no resetted decoder output # Note for attention mechanism return output[-1, :, :], hidden_query[:, :, :] # , hidden_decoder[-1, :, :] else: return output
def single_step(self, X, prev_hidden_query_states, prev_hidden_session, prev_hidden_decoder, reuse=True): """ Performs a step in the HRED X can be a 2-D tensor (batch, vocab), this can be used for beam search :param X: The input sessions. Lists of ints, ints correspond to words Shape: (max_length) :param start_hidden_query: The first hidden state of the query encoder. Initialized with zeros. Shape: (2 x query_dim) :param start_hidden_session: The first hidden state of the session encoder. Iniitalized with zeros. Shape: (2 x session_dim) :param start_hidden_decoder: The first hidden state of the decoder. Initialized with zeros. Shape: (output_dim) :return: """ # Note that with the implementation of attention the object "prev_hidden_query_states" contains not only the # previous query encoded state but all previous states, therefore we need to get the last query state prev_hidden_query = prev_hidden_query_states[-1, :, :] # Making embeddings for x embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse) # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the # session encoder when EoQ symbol has been seen yet. eoq_mask = tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32) query_encoder, hidden_query = tf.unstack(layers.gru_layer_with_reset( prev_hidden_query, # h_reset_prev (embedder, eoq_mask), name='forward_query_encoder', x_dim=self.embedding_dim, y_dim=self.query_dim, reuse=reuse )) # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state. session_encoder, hidden_session = tf.unstack(layers.gru_layer_with_retain( prev_hidden_session, # h_retain_prev (query_encoder, eoq_mask), name='session_encoder', x_dim=self.query_dim, y_dim=self.session_dim, reuse=reuse )) # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special, # as it incorporates the session_encoder into each hidden state update hidden_decoder = layers.gru_layer_with_state_reset( prev_hidden_decoder, (embedder, eoq_mask, session_encoder), name='decoder', x_dim=self.embedding_dim, h_dim=self.session_dim, y_dim=self.decoder_dim, reuse=reuse ) decoder = hidden_decoder flatten_decoder = tf.reshape(decoder, (-1, self.decoder_dim)) # add attention layer # expand to num_of_steps x batch_size x num_of_steps x query_dim num_of_atten_states = tf.shape(prev_hidden_query_states)[0] # tf.Print(num_of_atten_states, [num_of_atten_states], "INFO - single-step ") # tf.Print(flatten_decoder, [tf.shape(flatten_decoder)], "INFO - decoder.shape ") query_encoder_expanded = tf.transpose(prev_hidden_query_states, [1, 0, 2]) flatten_decoder_with_attention = \ layers.attention_step(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim, reuse=reuse) # After the decoder we add an additional output layer output = layers.output_layer( embedder, flatten_decoder_with_attention, # x_dim=self.embedding_dim, h_dim=self.decoder_dim + self.query_dim, # y_dim=self.output_dim, reuse=reuse ) # We compute the output logits based on the output layer above logits = layers.logits_layer( output, x_dim=self.output_dim, y_dim=self.vocab_size, reuse=reuse ) softmax = self.softmax(logits) return softmax, tf.concat([prev_hidden_query_states, tf.expand_dims(hidden_query, 0)], 0), hidden_session, hidden_decoder
def inference(self, X, Y, sequence_max_length, attention=False): """ Function to run the model. :param X: data batch [batch_size x max_seq] :param Y: target batch :param sequence_max_length: max_seq of the batch :return: logits [N, hidden size] where N is the number of words (including eoq) in the batch """ with tf.variable_scope('X_embedder', reuse=tf.AUTO_REUSE): embedder = layers.get_embedding_layer( vocabulary_size=self.vocab_size, embedding_dims=self.embedding_dim, data=X, scope='X_embedder') # For attention, pass bidirectional RNN if attention: with tf.variable_scope('gru_bidirectional', reuse=tf.AUTO_REUSE): self.annotations = layers.bidirectional_layer( embedder, self.query_dim, self.batch_size) # Create the query encoder state self.initial_query_state = self.query_encoder.compute_state( x=embedder) # batch_size x query_dims # Create the session state self.initial_session_state = self.session_encoder.compute_state( x=self.initial_query_state) # batch_size x session_dims # Create the initial decoder state self.initial_decoder_state = layers.decoder_initialise_layer( self.initial_session_state[0], self.decoder_dim) # batch_size x decoder_dims # Run decoder and retrieve outputs and states for all timesteps self.decoder_outputs = self.decoder_grucell.compute_prediction( # batch size x timesteps x output_size y=Y, state=self.initial_decoder_state, batch_size=self.batch_size, vocab_size=self.vocab_size) # For attention, calculate context vector if attention: self.context = layers.get_context_attention( self.annotations, self.decoder_outputs, self.decoder_dim, self.query_dim, sequence_max_length, self.batch_size) # batch_size x max_steps # Concatenate context vector to decoder state, assuming in a GRU states = outputs self.decoder_states_attention = tf.concat( [self.decoder_outputs, tf.expand_dims(self.context, 2)], axis=2) # TODO: check this # Calculate the omega function w(d_n-1, w_n-1) for attention with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE): omega = layers.output_layer( embedding_dims=self.embedding_dim, vocabulary_size=self.vocab_size, num_hidden=self.decoder_dim + 1, state=self.decoder_states_attention, word=Y) else: with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE): omega = layers.output_layer(embedding_dims=self.embedding_dim, vocabulary_size=self.vocab_size, num_hidden=self.decoder_dim, state=self.decoder_outputs, word=Y) # Get embeddings for decoder output with tf.variable_scope('ov_embedder', reuse=tf.AUTO_REUSE): ov_embedder = tf.get_variable( name='Ov_embedder', shape=[self.vocab_size, self.embedding_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=1.0)) # Dot product between omega and embeddings of vocabulary matrix logits = tf.einsum('bse,ve->bsv', omega, ov_embedder) return logits
def convolutional_neural_network(data): #NAMING CONVENTION: conv3s32n is a convolutional layer with filter size 3x3 and number of filters = 32 conv3s32n = layers.conv_layer(data, params.weights(depth=3), params.biases()) conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases()) conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases()) #NAMING CONVENTION: pool2w2s is a pool layer with 2x2 window size and stride = 2 pool2w2s = layers.pool_layer(conv3s32n) conv3s32n = layers.conv_layer(pool2w2s, params.weights(), params.biases()) conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases()) conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases()) pool2w2s = layers.pool_layer(conv3s32n) conv3s32n = layers.conv_layer(pool2w2s, params.weights(n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) '''conv3s32n = layers.conv_layer(pool2w2s, params.weights(n_filters=64), params.biases(n_filters=64)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64)) pool2w2s = layers.pool_layer(conv3s32n) conv3s32n = layers.conv_layer(pool2w2s, params.weights(depth=64, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128)) conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128))''' #pool2w2s = layers.pool_layer(conv3s32n) #conv3s32n = layers.conv_layer(pool2w2s, params.weights(depth=128, n_filters=256), params.biases(n_filters=256)) #conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=256, n_filters=256), params.biases(n_filters=256)) #conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=256, n_filters=256), params.biases(n_filters=256)) #NAMING CONVENTION: Fully connected layers are just indexed fc1 = layers.full_layer(conv3s32n, params.fc_weights(conv3s32n, 1024), params.biases(1024), keep_prob) fc2 = layers.full_layer(fc1, params.fc_weights(fc1, 1024), params.biases(1024), keep_prob) fc3 = layers.full_layer(fc2, params.fc_weights(fc2, 1024), params.biases(1024), keep_prob) output = layers.output_layer(fc1, params.fc_weights(fc1, n_classes), params.biases(n_classes)) return output