Пример #1
0
 def __init__(self, in_channels, out_channels_1, out_channels_2, KT_1, KT_2,
              num_nodes, batch_size, frames, frames_0, num_generator):
     super(Baseline, self).__init__()
     self.dropout = nn.Dropout(0.1)
     self.st_1 = st_conv_block(in_channels, out_channels_1, out_channels_2,
                               KT_1, num_nodes, batch_size, frames,
                               frames_0)
     self.st_2 = st_conv_block(out_channels_2, out_channels_2,
                               out_channels_2, KT_2, num_nodes, batch_size,
                               frames - 4 * (KT_1 - 1),
                               frames_0 - 2 * (KT_1 - 1))
     self.output_layer = output_layer(out_channels_2,
                                      frames - 4 * (KT_1 + KT_2 - 2),
                                      num_nodes, num_generator)
Пример #2
0
    def step_through_session(self, X, attention_mask, return_last_with_hidden_states=False, return_softmax=False, reuse=False):
        """
        Train for a batch of sessions in the HRED X can be a 3-D tensor (steps, batch, vocab)

        :param X: The input sessions. Lists of ints, ints correspond to words
        Shape: (max_length x batch_size)
        :return:
        """

        num_of_steps = tf.shape(X)[0]
        batch_size = tf.shape(X)[1]


        # Making embeddings for x
        embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse)

        # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the
        # session encoder when EoQ symbol has been seen yet.
        eoq_mask = tf.expand_dims(tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32), 2)

        # eoq mask has size [MAX LEN x BATCH SIZE] --> we want to loop over batch size

        # BATCH_SIZE = 80
        # MAX_LEN = 50  # TODO: this shouldn't be as local

        # print((embedder, eoq_mask))
        # Computes the encoded query state. The tensorflow scan function repeatedly applies the gru_layer_with_reset
        # function to (embedder, eoq_mask) and it initialized the gru layer with the zero tensor.
        # In the query encoder we need the possibility to reset the gru layer, namely after the eos symbol has been
        # reached
        query_encoder_packed = tf.scan(
            lambda result_prev, x: layers.gru_layer_with_reset(
                result_prev[1],  # h_reset_prev
                x,
                name='forward_query_encoder',
                x_dim=self.embedding_dim,
                y_dim=self.query_dim,
                reuse=reuse
            ),
            (embedder, eoq_mask),  # scan does not accept multiple tensors so we need to pack and unpack
            initializer=tf.zeros((2, batch_size, self.query_dim))
        )
        # print(tf.shape(query_encoder_packed))

        query_encoder, hidden_query = tf.unstack(query_encoder_packed, axis=1)
        # query_encoder = tf.nn.dropout(query_encoder, keep_prob=0.5)



        # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current
        # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state.
        # session_encoder_packed = tf.scan(
        #     lambda result_prev, x: layers.gru_layer_with_retain(
        #         result_prev[1],  # h_retain_prev
        #         x,
        #         name='session_encoder',
        #         x_dim=self.query_dim,  # 2*
        #         y_dim=self.session_dim,
        #         reuse=reuse
        #     ),
        #     (query_encoder, eoq_mask),
        #     initializer=tf.zeros((2, batch_size, self.session_dim))
        # )
        #
        # session_encoder, hidden_session = tf.unstack(session_encoder_packed, axis=1)
        # session_encoder = layers.gnn_attention(session_encoder, attention_mask, query_encoder_gnn, self.session_dim,
        #                                        self.query_dim, reuse=reuse)

        # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector
        # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special,
        # as it incorporates the session_encoder into each hidden state update
        # decoder = tf.scan(
        #     lambda result_prev, x: layers.gru_layer_with_state_reset(
        #         result_prev,
        #         x,
        #         name='decoder',
        #         x_dim=self.embedding_dim,
        #         h_dim=self.query_dim,
        #         y_dim=self.decoder_dim,
        #         reuse=reuse
        #     ),
        #     (embedder, eoq_mask, query_encoder),
        #     # scan does not accept multiple tensors so we need to pack and unpack
        #     initializer=tf.zeros((batch_size, self.decoder_dim))
        # )

        # After the decoder we add an additional output layer
        flatten_decoder = tf.reshape(query_encoder, (-1, self.decoder_dim))
        flatten_embedder = tf.reshape(embedder, (-1, self.embedding_dim))
        # flatten_session_encoder = tf.reshape(session_encoder, (-1, self.session_dim))

        # attention

        # expand to batch_size x num_of_steps x query_dim
        # query_encoder_T = tf.transpose(query_encoder, perm=[1, 0, 2])
        # query_decoder_T = tf.transpose(decoder, perm=[1, 0, 2])

        # expand to num_of_steps x batch_size x num_of_steps x query_dim
        # query_encoder_expanded = tf.tile(tf.expand_dims(query_encoder, 2), (1, 1, num_of_steps, 1))

        # query_encoder_expanded = query_encoder_expanded * tf.tile(tf.expand_dims(attention_mask, 3), (1, 1, 1, self.query_dim))  # 2*

        # flatten_decoder_with_attention = \
        #     layers.attention_session(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim,
        #                              reuse=reuse)  # 2*

        output_layer = layers.output_layer(
            flatten_embedder,
            flatten_decoder,  #
            x_dim=self.embedding_dim,
            h_dim=self.decoder_dim,  # 2*
            y_dim=self.output_dim,
            reuse=reuse
        )

        # We compute the output logits based on the output layer above
        flatten_logits, self.l2_loss = layers.logits_layer(
            output_layer,
            self.l2_loss,
            x_dim=self.output_dim,
            y_dim=self.vocab_size,
            reuse=reuse
        )

        logits = tf.reshape(flatten_logits, (num_of_steps, batch_size, self.vocab_size))
        # logits = tf.Print(logits, [np.argmax(logits)], summarize=1500)

        # If we want the softmax back from this step or just the logits f or the loss function
        if return_softmax:
            output = self.softmax(logits)
        else:
            output = logits

        # If we want to continue decoding with single_step we need the hidden states of all GRU layers
        if return_last_with_hidden_states:
            # hidden_decoder = decoder  # there is no resetted decoder output
            # Note for attention mechanism
            return output[-1, :, :], hidden_query[:, :, :] # , hidden_decoder[-1, :, :]
        else:
            return output
Пример #3
0
    def single_step(self, X, prev_hidden_query_states, prev_hidden_session, prev_hidden_decoder, reuse=True):
        """
        Performs a step in the HRED X can be a 2-D tensor (batch, vocab), this can be used
        for beam search

        :param X: The input sessions. Lists of ints, ints correspond to words
        Shape: (max_length)
        :param start_hidden_query: The first hidden state of the query encoder. Initialized with zeros.
        Shape: (2 x query_dim)
        :param start_hidden_session: The first hidden state of the session encoder. Iniitalized with zeros.
        Shape: (2 x session_dim)
        :param start_hidden_decoder: The first hidden state of the decoder. Initialized with zeros.
        Shape: (output_dim)
        :return:
        """
        # Note that with the implementation of attention the object "prev_hidden_query_states" contains not only the
        # previous query encoded state but all previous states, therefore we need to get the last query state
        prev_hidden_query = prev_hidden_query_states[-1, :, :]
        # Making embeddings for x
        embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse)

        # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the
        # session encoder when EoQ symbol has been seen yet.
        eoq_mask = tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32)

        query_encoder, hidden_query = tf.unstack(layers.gru_layer_with_reset(
            prev_hidden_query,  # h_reset_prev
            (embedder, eoq_mask),
            name='forward_query_encoder',
            x_dim=self.embedding_dim,
            y_dim=self.query_dim,
            reuse=reuse
        ))


        # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current
        # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state.
        session_encoder, hidden_session = tf.unstack(layers.gru_layer_with_retain(
            prev_hidden_session,  # h_retain_prev
            (query_encoder, eoq_mask),
            name='session_encoder',
            x_dim=self.query_dim,
            y_dim=self.session_dim,
            reuse=reuse
        ))

        # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector
        # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special,
        # as it incorporates the session_encoder into each hidden state update
        hidden_decoder = layers.gru_layer_with_state_reset(
            prev_hidden_decoder,
            (embedder, eoq_mask, session_encoder),
            name='decoder',
            x_dim=self.embedding_dim,
            h_dim=self.session_dim,
            y_dim=self.decoder_dim,
            reuse=reuse
        )

        decoder = hidden_decoder
        flatten_decoder = tf.reshape(decoder, (-1, self.decoder_dim))

        # add attention layer
        # expand to num_of_steps x batch_size x num_of_steps x query_dim
        num_of_atten_states = tf.shape(prev_hidden_query_states)[0]
        # tf.Print(num_of_atten_states, [num_of_atten_states], "INFO - single-step ")
        # tf.Print(flatten_decoder, [tf.shape(flatten_decoder)], "INFO - decoder.shape ")
        query_encoder_expanded = tf.transpose(prev_hidden_query_states, [1, 0, 2])

        flatten_decoder_with_attention = \
            layers.attention_step(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim,
                                  reuse=reuse)

        # After the decoder we add an additional output layer
        output = layers.output_layer(
            embedder,
            flatten_decoder_with_attention,  #
            x_dim=self.embedding_dim,
            h_dim=self.decoder_dim + self.query_dim,  #
            y_dim=self.output_dim,
            reuse=reuse
        )

        # We compute the output logits based on the output layer above
        logits = layers.logits_layer(
            output,
            x_dim=self.output_dim,
            y_dim=self.vocab_size,
            reuse=reuse
        )

        softmax = self.softmax(logits)

        return softmax, tf.concat([prev_hidden_query_states, tf.expand_dims(hidden_query, 0)], 0), hidden_session, hidden_decoder
Пример #4
0
    def inference(self, X, Y, sequence_max_length, attention=False):
        """
        Function to run the model.
        :param X: data batch [batch_size x max_seq]
        :param Y: target batch
        :param sequence_max_length: max_seq of the batch
        :return: logits [N, hidden size] where N is the number of words (including eoq) in the batch
        """

        with tf.variable_scope('X_embedder', reuse=tf.AUTO_REUSE):
            embedder = layers.get_embedding_layer(
                vocabulary_size=self.vocab_size,
                embedding_dims=self.embedding_dim,
                data=X,
                scope='X_embedder')

        # For attention, pass bidirectional RNN
        if attention:
            with tf.variable_scope('gru_bidirectional', reuse=tf.AUTO_REUSE):
                self.annotations = layers.bidirectional_layer(
                    embedder, self.query_dim, self.batch_size)
        # Create the query encoder state
        self.initial_query_state = self.query_encoder.compute_state(
            x=embedder)  # batch_size x query_dims
        # Create the session state
        self.initial_session_state = self.session_encoder.compute_state(
            x=self.initial_query_state)  # batch_size x session_dims
        # Create the initial decoder state
        self.initial_decoder_state = layers.decoder_initialise_layer(
            self.initial_session_state[0],
            self.decoder_dim)  # batch_size x decoder_dims

        # Run decoder and retrieve outputs and states for all timesteps
        self.decoder_outputs = self.decoder_grucell.compute_prediction(  # batch size x timesteps x output_size
            y=Y,
            state=self.initial_decoder_state,
            batch_size=self.batch_size,
            vocab_size=self.vocab_size)

        # For attention, calculate context vector
        if attention:
            self.context = layers.get_context_attention(
                self.annotations, self.decoder_outputs, self.decoder_dim,
                self.query_dim, sequence_max_length,
                self.batch_size)  # batch_size x max_steps
            # Concatenate context vector to decoder state, assuming in a GRU states = outputs
            self.decoder_states_attention = tf.concat(
                [self.decoder_outputs,
                 tf.expand_dims(self.context, 2)],
                axis=2)  # TODO: check this
            # Calculate the omega function w(d_n-1, w_n-1) for attention
            with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE):
                omega = layers.output_layer(
                    embedding_dims=self.embedding_dim,
                    vocabulary_size=self.vocab_size,
                    num_hidden=self.decoder_dim + 1,
                    state=self.decoder_states_attention,
                    word=Y)
        else:
            with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE):
                omega = layers.output_layer(embedding_dims=self.embedding_dim,
                                            vocabulary_size=self.vocab_size,
                                            num_hidden=self.decoder_dim,
                                            state=self.decoder_outputs,
                                            word=Y)

        # Get embeddings for decoder output

        with tf.variable_scope('ov_embedder', reuse=tf.AUTO_REUSE):
            ov_embedder = tf.get_variable(
                name='Ov_embedder',
                shape=[self.vocab_size, self.embedding_dim],
                initializer=tf.random_normal_initializer(mean=0.0, stddev=1.0))

        # Dot product between omega and embeddings of vocabulary matrix
        logits = tf.einsum('bse,ve->bsv', omega, ov_embedder)

        return logits
Пример #5
0
def convolutional_neural_network(data):
    #NAMING CONVENTION: conv3s32n is a convolutional layer with filter size 3x3 and number of filters = 32
    conv3s32n = layers.conv_layer(data, params.weights(depth=3),
                                  params.biases())
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases())
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases())

    #NAMING CONVENTION: pool2w2s is a pool layer with 2x2 window size and stride = 2
    pool2w2s = layers.pool_layer(conv3s32n)

    conv3s32n = layers.conv_layer(pool2w2s, params.weights(), params.biases())
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases())
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(), params.biases())

    pool2w2s = layers.pool_layer(conv3s32n)

    conv3s32n = layers.conv_layer(pool2w2s, params.weights(n_filters=128),
                                  params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n,
                                  params.weights(depth=128, n_filters=128),
                                  params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n,
                                  params.weights(depth=128, n_filters=128),
                                  params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n,
                                  params.weights(depth=128, n_filters=128),
                                  params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n,
                                  params.weights(depth=128, n_filters=128),
                                  params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n,
                                  params.weights(depth=128, n_filters=128),
                                  params.biases(n_filters=128))
    '''conv3s32n = layers.conv_layer(pool2w2s, params.weights(n_filters=64), params.biases(n_filters=64))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=64, n_filters=64), params.biases(n_filters=64))

    pool2w2s = layers.pool_layer(conv3s32n)

    conv3s32n = layers.conv_layer(pool2w2s, params.weights(depth=64, n_filters=128), params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128))
    conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=128, n_filters=128), params.biases(n_filters=128))'''

    #pool2w2s = layers.pool_layer(conv3s32n)

    #conv3s32n = layers.conv_layer(pool2w2s, params.weights(depth=128, n_filters=256), params.biases(n_filters=256))
    #conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=256, n_filters=256), params.biases(n_filters=256))
    #conv3s32n = layers.conv_layer(conv3s32n, params.weights(depth=256, n_filters=256), params.biases(n_filters=256))

    #NAMING CONVENTION: Fully connected layers are just indexed
    fc1 = layers.full_layer(conv3s32n, params.fc_weights(conv3s32n, 1024),
                            params.biases(1024), keep_prob)
    fc2 = layers.full_layer(fc1, params.fc_weights(fc1, 1024),
                            params.biases(1024), keep_prob)
    fc3 = layers.full_layer(fc2, params.fc_weights(fc2, 1024),
                            params.biases(1024), keep_prob)

    output = layers.output_layer(fc1, params.fc_weights(fc1, n_classes),
                                 params.biases(n_classes))

    return output