def _create_gates(self, inputs, memory):
        """Create input and forget gates for this step using `inputs` and `memory`.

        Args:
          inputs: Tensor input.
          memory: The current state of memory.

        Returns:
          input_gate: A LSTM-like insert gate.
          forget_gate: A LSTM-like forget gate.
        """
        # We'll create the input and forget gates at once. Hence, calculate double
        # the gate size.
        num_gates = 2 * self._calculate_gate_size()
        batch_size = memory.get_shape().as_list()[0]

        memory = tf.tanh(memory)  # B x N x H * V

        inputs = tf.reshape(inputs, [batch_size, -1])  # B x In_size
        gate_inputs = linear(inputs,
                             num_gates,
                             use_bias=False,
                             scope='gate_in')  # B x num_gates
        gate_inputs = tf.expand_dims(gate_inputs,
                                     axis=1)  # B x sentiment.1 x num_gates

        memory_flattened = tf.reshape(memory,
                                      [-1, self._mem_size])  # [B * N, H * V]
        gate_memory = linear(memory_flattened,
                             num_gates,
                             use_bias=False,
                             scope='gate_mem')  # [B * N, num_gates]
        gate_memory = tf.reshape(
            gate_memory,
            [batch_size, self._mem_slots, num_gates])  # [B, N, num_gates]

        gates = tf.split(gate_memory + gate_inputs,
                         num_or_size_splits=2,
                         axis=2)
        input_gate, forget_gate = gates  # B x N x num_gates/2, B x N x num_gates/2

        input_gate = tf.sigmoid(input_gate + self._input_bias)
        forget_gate = tf.sigmoid(forget_gate + self._forget_bias)

        return input_gate, forget_gate
예제 #2
0
    def _multihead_attention(self, memory):
        """Perform multi-head attention from 'Attention is All You Need'.

        Implementation of the attention mechanism from
        https://arxiv.org/abs/1706.03762.

        Args:
          memory: Memory tensor to perform attention on, with size [B, N, H*V].

        Returns:
          new_memory: New memory tensor.
        """

        qkv_size = 2 * self._key_size + self._head_size
        total_size = qkv_size * self._num_heads  # Denote as F.
        batch_size = memory.get_shape().as_list()[0]  # Denote as B
        memory_flattened = tf.reshape(memory,
                                      [-1, self._mem_size])  # [B * N, H * V]
        qkv = linear(memory_flattened,
                     total_size,
                     use_bias=False,
                     scope='lin_qkv')  # [B*N, F]
        qkv = tf.reshape(qkv, [batch_size, -1, total_size])  # [B, N, F]
        qkv = tf.contrib.layers.layer_norm(qkv, trainable=True)  # [B, N, F]

        # [B, N, F] -> [B, N, H, F/H]
        qkv_reshape = tf.reshape(qkv,
                                 [batch_size, -1, self._num_heads, qkv_size])

        # [B, N, H, F/H] -> [B, H, N, F/H]
        qkv_transpose = tf.transpose(qkv_reshape, [0, 2, 1, 3])
        q, k, v = tf.split(qkv_transpose,
                           [self._key_size, self._key_size, self._head_size],
                           -1)

        q *= qkv_size**-0.5
        dot_product = tf.matmul(q, k, transpose_b=True)  # [B, H, N, N]
        weights = tf.nn.softmax(dot_product)

        output = tf.matmul(weights, v)  # [B, H, N, V]

        # [B, H, N, V] -> [B, N, H, V]
        output_transpose = tf.transpose(output, [0, 2, 1, 3])

        # [B, N, H, V] -> [B, N, H * V]
        new_memory = tf.reshape(output_transpose,
                                [batch_size, -1, self._mem_size])
        return new_memory
    def _make_generator(self, input, phase_train):
        s_h, s_w = self.img_size, self.img_size
        s_h2, s_w2 = self._conv_out_size_same(s_h,
                                              2), self._conv_out_size_same(
                                                  s_w, 2)
        s_h4, s_w4 = self._conv_out_size_same(s_h2,
                                              2), self._conv_out_size_same(
                                                  s_w2, 2)
        #s_h8, s_w8 = self._conv_out_size_same(s_h4, 2), self._conv_out_size_same(s_w4, 2)
        #s_h16, s_w16 = self._conv_out_size_same(s_h8, 2), self._conv_out_size_same(s_w8, 2)
        # project `z` and reshape
        self.z_, self.h0_w, self.h0_b = ops.linear(input,
                                                   self.gf_dim * 8 * s_h4 *
                                                   s_w4,
                                                   'g_h0_lin',
                                                   with_w=True)
        normalized_value = ops.batch_norm(self.z_,
                                          name='g_bn0',
                                          axes=[0],
                                          phase_train=phase_train)

        self.h0 = tf.reshape(normalized_value,
                             [-1, s_h4, s_w4, self.gf_dim * 8])

        h0 = ops.lrelu(self.h0)

        self.h1, self.h1_w, self.h1_b = ops.deconv2d(
            h0, [self.batch_size, s_h2, s_w2, self.gf_dim * 4],
            name='g_h1',
            with_w=True)
        h1 = ops.lrelu(
            ops.batch_norm(self.h1, name='g_bn1', phase_train=phase_train))

        # h2, self.h2_w, self.h2_b = ops.deconv2d(
        #     h1, [self.batch_size, s_h4, s_w4, self.gf_dim*2], name='g_h2', with_w=True)
        # h2 = tf.nn.relu(ops.batch_norm(h2, name='g_bn2'))
        #
        # h3, self.h3_w, self.h3_b = ops.deconv2d(
        #     h2, [self.batch_size, s_h2, s_w2, self.gf_dim*1], name='g_h3', with_w=True)
        # h3 = tf.nn.relu(ops.batch_norm(h3, name='g_bn3'))

        h2, self.h2_w, self.h2_b = ops.deconv2d(
            h1, [self.batch_size, s_h, s_w, self.c_dim],
            name='g_h4',
            with_w=True)
        h2_non_linear = ops.lrelu(h2, leak=0)
        return h2_non_linear
예제 #4
0
    def _build(self, inputs, memory):
        """Adds relational memory to the TensorFlow graph.

        Args:
          inputs: Tensor input.
          memory: Memory output from the previous time step.

        Returns:
          output: This time step's output.
          next_memory: The next version of memory to use.
        """

        batch_size = memory.get_shape().as_list()[0]
        inputs = tf.reshape(inputs, [batch_size, -1])  # [B, In_size]
        inputs = linear(inputs,
                        self._mem_size,
                        use_bias=True,
                        scope='input_for_concat')  # [B, V * H]
        inputs_reshape = tf.expand_dims(inputs, 1)  # [B, 1, V * H]

        memory_plus_input = tf.concat([memory, inputs_reshape],
                                      axis=1)  # [B, N + 1, V * H]
        # qua faccio la self-attention sulla memoria per determinare la M(t+1)
        next_memory = self._attend_over_memory(
            memory_plus_input)  # [B, N + 1, V * H]

        n = inputs_reshape.get_shape().as_list()[1]
        next_memory = next_memory[:, :
                                  -n, :]  # [B, N, V * H], rimuovo la dimensione in più data dal'input

        if self._gate_style == 'unit' or self._gate_style == 'memory':
            self._input_gate, self._forget_gate = self._create_gates(
                inputs_reshape, memory)
            next_memory = self._input_gate * tf.tanh(next_memory)
            next_memory += self._forget_gate * memory

        # semplicemente per l'output prendo la memoria e la rendo a una dimensione. Questa dimensione non è quella
        # del vocabolario perchè poi questo output appena usctio passa attraverso un MLP.
        # L'output deriva direttamente dalla nuova memoria, si potrebbe usare l'output per determinare lambda
        output = tf.reshape(next_memory, [batch_size, -1])
        return output, next_memory
예제 #5
0
    def _build(self, inputs, memory):
        """Adds relational memory to the TensorFlow graph.

        Args:
          inputs: Tensor input.
          memory: Memory output from the previous time step.

        Returns:
          output: This time step's output.
          next_memory: The next version of memory to use.
        """

        batch_size = memory.get_shape().as_list()[0]
        inputs = tf.reshape(inputs, [batch_size, -1])  # [B, In_size]
        inputs = linear(inputs,
                        self._mem_size,
                        use_bias=True,
                        scope='input_for_cancat')  # [B, V * H]
        inputs_reshape = tf.expand_dims(inputs, 1)  # [B, 1, V * H]

        memory_plus_input = tf.concat([memory, inputs_reshape],
                                      axis=1)  # [B, N + 1, V * H]
        next_memory = self._attend_over_memory(
            memory_plus_input)  # [B, N + 1, V * H]

        n = inputs_reshape.get_shape().as_list()[1]
        next_memory = next_memory[:, :-n, :]  # [B, N, V * H]

        if self._gate_style == 'unit' or self._gate_style == 'memory':
            self._input_gate, self._forget_gate = self._create_gates(
                inputs_reshape, memory)
            next_memory = self._input_gate * tf.tanh(next_memory)
            next_memory += self._forget_gate * memory

        output = tf.reshape(next_memory, [batch_size, -1])
        return output, next_memory
예제 #6
0
    def logits(self, x_onehot):
        batch_size = self.batch_size
        seq_len = self.seq_len
        vocab_size = self.vocab_size
        dis_emb_dim = self.dis_emb_dim
        num_rep = self.num_rep
        sn = self.sn

        # get the embedding dimension for each presentation
        emb_dim_single = int(dis_emb_dim / num_rep)
        assert isinstance(emb_dim_single, int) and emb_dim_single > 0

        filter_sizes = [2, 3, 4, 5]
        num_filters = [300, 300, 300, 300]
        dropout_keep_prob = 0.75

        d_embeddings = tf.get_variable('d_emb', shape=[vocab_size, dis_emb_dim],
                                       initializer=create_linear_initializer(vocab_size))
        input_x_re = tf.reshape(x_onehot, [-1, vocab_size])
        emb_x_re = tf.matmul(input_x_re, d_embeddings)
        # batch_size x seq_len x dis_emb_dim
        emb_x = tf.reshape(emb_x_re, [batch_size, seq_len, dis_emb_dim])

        # batch_size x seq_len x dis_emb_dim x 1
        emb_x_expanded = tf.expand_dims(emb_x, -1)
        # print('shape of emb_x_expanded: {}'.format(
        #     emb_x_expanded.get_shape().as_list()))

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for filter_size, num_filter in zip(filter_sizes, num_filters):
            conv = conv2d(emb_x_expanded, num_filter, k_h=filter_size, k_w=emb_dim_single,
                          d_h=1, d_w=emb_dim_single, sn=sn, stddev=None, padding='VALID',
                          scope="conv-%s" % filter_size)  # batch_size x (seq_len-k_h+1) x num_rep x num_filter
            out = tf.nn.relu(conv, name="relu_new")
            pooled = tf.nn.max_pool(out, ksize=[1, seq_len - filter_size + 1, 1, 1],
                                    strides=[1, 1, 1, 1], padding='VALID',
                                    name="pool_new")  # batch_size x 1 x num_rep x num_filter
            pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = sum(num_filters)
        # batch_size x 1 x num_rep x num_filters_total
        h_pool = tf.concat(pooled_outputs, 3)
        # print('shape of h_pool: {}'.format(h_pool.get_shape().as_list()))
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # Add highway
        # (batch_size*num_rep) x num_filters_total
        h_highway = highway(h_pool_flat, h_pool_flat.get_shape()[1], 1, 0)

        # Add dropout
        h_drop = tf.nn.dropout(h_highway, dropout_keep_prob, name='dropout_new')

        # fc
        fc_out = linear(h_drop, output_size=100,
                        use_bias=True, sn=sn, scope='fc_new')
        logits = linear(fc_out, output_size=1,
                        use_bias=True, sn=sn, scope='logits_new')
        logits = tf.squeeze(logits, -1)  # batch_size*num_rep
        return logits