Пример #1
0
  def get_constants(self, inputs, training=None):
    constants = []
    if self.implementation != 0 and 0 < self.dropout < 1:
      input_shape = K.int_shape(inputs)
      input_dim = input_shape[-1]
      ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
      ones = K.tile(ones, (1, int(input_dim)))

      def dropped_inputs():
        return K.dropout(ones, self.dropout)

      dp_mask = [
          K.in_train_phase(dropped_inputs, ones, training=training)
          for _ in range(3)
      ]
      constants.append(dp_mask)
    else:
      constants.append([K.cast_to_floatx(1.) for _ in range(3)])

    if 0 < self.recurrent_dropout < 1:
      ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
      ones = K.tile(ones, (1, self.units))

      def dropped_inputs():  # pylint: disable=function-redefined
        return K.dropout(ones, self.recurrent_dropout)

      rec_dp_mask = [
          K.in_train_phase(dropped_inputs, ones, training=training)
          for _ in range(3)
      ]
      constants.append(rec_dp_mask)
    else:
      constants.append([K.cast_to_floatx(1.) for _ in range(3)])
    return constants
Пример #2
0
 def call(self, inputs):
   if not isinstance(inputs, list):
     raise ValueError('A merge layer should be called ' 'on a list of inputs.')
   if self._reshape_required:
     reshaped_inputs = []
     input_ndims = list(map(K.ndim, inputs))
     if None not in input_ndims:
       # If ranks of all inputs are available,
       # we simply expand each of them at axis=1
       # until all of them have the same rank.
       max_ndim = max(input_ndims)
       for x in inputs:
         x_ndim = K.ndim(x)
         for _ in range(max_ndim - x_ndim):
           x = K.expand_dims(x, 1)
         reshaped_inputs.append(x)
       return self._merge_function(reshaped_inputs)
     else:
       # Transpose all inputs so that batch size is the last dimension.
       # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size)
       transposed = False
       for x in inputs:
         x_ndim = K.ndim(x)
         if x_ndim is None:
           x_shape = K.shape(x)
           batch_size = x_shape[0]
           new_shape = K.concatenate([x_shape[1:], K.expand_dims(batch_size)])
           x_transposed = K.reshape(x,
                                    K.stack([batch_size,
                                             K.prod(x_shape[1:])]))
           x_transposed = K.permute_dimensions(x_transposed, (1, 0))
           x_transposed = K.reshape(x_transposed, new_shape)
           reshaped_inputs.append(x_transposed)
           transposed = True
         elif x_ndim > 1:
           dims = list(range(1, x_ndim)) + [0]
           reshaped_inputs.append(K.permute_dimensions(x, dims))
           transposed = True
         else:
           # We don't transpose inputs if they are 1D vectors or scalars.
           reshaped_inputs.append(x)
       y = self._merge_function(reshaped_inputs)
       y_ndim = K.ndim(y)
       if transposed:
         # If inputs have been transposed, we have to transpose the output too.
         if y_ndim is None:
           y_shape = K.shape(y)
           y_ndim = K.shape(y_shape)[0]
           batch_size = y_shape[y_ndim - 1]
           new_shape = K.concatenate(
               [K.expand_dims(batch_size), y_shape[:y_ndim - 1]])
           y = K.reshape(y, (-1, batch_size))
           y = K.permute_dimensions(y, (1, 0))
           y = K.reshape(y, new_shape)
         elif y_ndim > 1:
           dims = [y_ndim - 1] + list(range(y_ndim - 1))
           y = K.permute_dimensions(y, dims)
       return y
   else:
     return self._merge_function(inputs)
Пример #3
0
 def _linear(self, x, kernel, bias):
     in_shape = x.shape
     if len(in_shape) > 2:
         x_shape = [int(dim) for dim in x.shape]
         x = K.reshape(x, (x_shape[0] * x_shape[1], x_shape[2]))
     x = K.dot(x, kernel)
     x = K.bias_add(x, bias)
     if len(in_shape) > 2:
         x = K.reshape(x, (x_shape[0], x_shape[1], int(kernel.shape[1])))
     return x
Пример #4
0
 def _layer_norm(self, x, offset, scale):
     in_shape = x.shape
     if len(in_shape) > 2:
         x_shape = [int(dim) for dim in x.shape]
         x = K.reshape(x, (x_shape[0] * x_shape[1], x_shape[2]))
     mean, var = tf.nn.moments(x, [1], keep_dims=True)
     x = tf.nn.batch_normalization(x, mean, var, offset, scale, K.epsilon())
     if len(in_shape) > 2:
         x = K.reshape(x, x_shape)
     return x
Пример #5
0
  def call(self, inputs, training=None, mask=None):
    kwargs = {}
    if has_arg(self.layer.call, 'training'):
      kwargs['training'] = training
    uses_learning_phase = False  # pylint: disable=redefined-outer-name

    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, use rnn-based implementation
      def step(x, _):
        global uses_learning_phase  # pylint: disable=global-variable-undefined
        output = self.layer.call(x, **kwargs)
        if hasattr(output, '_uses_learning_phase'):
          uses_learning_phase = (output._uses_learning_phase or
                                 uses_learning_phase)
        return output, []

      _, outputs, _ = K.rnn(
          step,
          inputs,
          initial_states=[],
          unroll=False)
      y = outputs
    else:
      # No batch size specified, therefore the layer will be able
      # to process batches of any size.
      # We can go with reshape-based implementation for performance.
      input_length = input_shape[1]
      if not input_length:
        input_length = K.shape(inputs)[1]
      # Shape: (num_samples * timesteps, ...). And track the
      # transformation in self._input_map.
      input_uid = tf_layers_util.object_list_uid(inputs)
      inputs = K.reshape(inputs, (-1,) + input_shape[2:])
      self._input_map[input_uid] = inputs
      # (num_samples * timesteps, ...)
      y = self.layer.call(inputs, **kwargs)
      if hasattr(y, '_uses_learning_phase'):
        uses_learning_phase = y._uses_learning_phase
      # Shape: (num_samples, timesteps, ...)
      output_shape = self.compute_output_shape(input_shape).as_list()
      y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:]))

    # Apply activity regularizer if any:
    if (hasattr(self.layer, 'activity_regularizer') and
        self.layer.activity_regularizer is not None):
      regularization_loss = self.layer.activity_regularizer(y)
      self.add_loss(regularization_loss, inputs)

    if uses_learning_phase:
      y._uses_learning_phase = True
    return y
Пример #6
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if has_arg(self.layer.call, 'training'):
            kwargs['training'] = training
        uses_learning_phase = False  # pylint: disable=redefined-outer-name

        input_shape = K.int_shape(inputs)
        if input_shape[0]:
            # batch size matters, use rnn-based implementation
            def step(x, _):
                global uses_learning_phase  # pylint: disable=global-variable-undefined
                output = self.layer.call(x, **kwargs)
                if hasattr(output, '_uses_learning_phase'):
                    uses_learning_phase = (output._uses_learning_phase
                                           or uses_learning_phase)
                return output, []

            _, outputs, _ = K.rnn(step,
                                  inputs,
                                  initial_states=[],
                                  unroll=False)
            y = outputs
        else:
            # No batch size specified, therefore the layer will be able
            # to process batches of any size.
            # We can go with reshape-based implementation for performance.
            input_length = input_shape[1]
            if not input_length:
                input_length = K.shape(inputs)[1]
            # Shape: (num_samples * timesteps, ...). And track the
            # transformation in self._input_map.
            input_uid = tf_layers_util.object_list_uid(inputs)
            inputs = K.reshape(inputs, (-1, ) + input_shape[2:])
            self._input_map[input_uid] = inputs
            # (num_samples * timesteps, ...)
            y = self.layer.call(inputs, **kwargs)
            if hasattr(y, '_uses_learning_phase'):
                uses_learning_phase = y._uses_learning_phase
            # Shape: (num_samples, timesteps, ...)
            output_shape = self.compute_output_shape(input_shape).as_list()
            y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:]))

        # Apply activity regularizer if any:
        if (hasattr(self.layer, 'activity_regularizer')
                and self.layer.activity_regularizer is not None):
            regularization_loss = self.layer.activity_regularizer(y)
            self.add_loss(regularization_loss, inputs)

        if uses_learning_phase:
            y._uses_learning_phase = True
        return y
Пример #7
0
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
  """Apply `y . w + b` for every temporal slice y of x.

  Arguments:
      x: input tensor.
      w: weight matrix.
      b: optional bias vector.
      dropout: whether to apply dropout (same dropout mask
          for every temporal slice of the input).
      input_dim: integer; optional dimensionality of the input.
      output_dim: integer; optional dimensionality of the output.
      timesteps: integer; optional number of timesteps.
      training: training phase tensor or boolean.

  Returns:
      Output tensor.
  """
  if not input_dim:
    input_dim = K.shape(x)[2]
  if not timesteps:
    timesteps = K.shape(x)[1]
  if not output_dim:
    output_dim = K.shape(w)[1]

  if dropout is not None and 0. < dropout < 1.:
    # apply the same dropout pattern at every timestep
    ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
    dropout_matrix = K.dropout(ones, dropout)
    expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
    x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

  # collapse time dimension and batch dimension together
  x = K.reshape(x, (-1, input_dim))
  x = K.dot(x, w)
  if b is not None:
    x = K.bias_add(x, b)
  # reshape to 3D tensor
  if K.backend() == 'tensorflow':
    x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
    x.set_shape([None, None, output_dim])
  else:
    x = K.reshape(x, (-1, timesteps, output_dim))
  return x
Пример #8
0
 def call(self, inputs):
   # In case the target shape is not fully defined,
   # we need access to the shape of x.
   target_shape = self.target_shape
   if -1 in target_shape:
     # target shape not fully defined
     target_shape = self._compute_output_shape(inputs.get_shape())
     target_shape = target_shape.as_list()[1:]
   return K.reshape(inputs, (-1,) + tuple(target_shape))
Пример #9
0
    def _multihead_attention(self, memory):
        """Perform multi-head attention from 'Attention is All You Need'.
            Implementation of the attention mechanism from
            https://arxiv.org/abs/1706.03762.
            Args:
              memory: Memory tensor to perform attention on.
            Returns:
              new_memory: New memory tensor.
            """

        batch_size = int(memory.shape[0])

        qkv = self._linear(memory, self.kernel_qkv, self.bias_qkv)
        # qkv = self._layer_norm(qkv, self.offset_qkv, self.scale_qkv)

        mem_slots = memory.get_shape().as_list()[1]  # Denoted as N.

        # [B, N, F] -> [B, N, H, F/H]
        qkv_reshape = K.reshape(
            qkv, (batch_size, mem_slots, self.num_heads, self.qkv_size))

        # [B, N, H, F/H] -> [B, H, N, F/H]
        qkv_transpose = K.permute_dimensions(qkv_reshape, [0, 2, 1, 3])
        q, k, v = tf.split(qkv_transpose,
                           [self.key_size, self.key_size, self.value_size], -1)

        q *= self.qkv_size**-0.5
        dot_product = tf.matmul(q, k, transpose_b=True)  # [B, H, N, N]
        weights = K.softmax(dot_product)

        output = tf.matmul(weights, v)  # [B, H, N, V]

        # [B, H, N, V] -> [B, N, H, V]
        output_transpose = K.permute_dimensions(output, [0, 2, 1, 3])

        # [B, N, H, V] -> [B, N, H * V]
        new_memory = K.reshape(output_transpose,
                               (batch_size, mem_slots, self.mem_size))

        return new_memory
    def compute_spectral_normal(self, training=True):
        # Spectrally Normalized Weight
        if self.spectral_normalization:
            # Get kernel tensor shape
            W_shape = self.kernel.shape.as_list()

            # Flatten the Tensor
            W_mat = K.reshape(self.kernel, [W_shape[-1], -1])  # [out_c, N]

            sigma, u, v = power_iteration(W_mat, self.u)

            if training:
                # Update estimated 1st singular vector
                self.u.assign(u)

            return self.kernel / sigma
        else:
            return self.kernel
Пример #11
0
    def call(self, inputs, memory, training=None):
        batch_size = int(inputs.shape[0])

        memory = K.reshape(memory, (batch_size, self.mem_slots, self.mem_size))
        inputs = self._linear(inputs, self.kernel_in, self.bias_in)
        inputs_reshape = K.expand_dims(inputs, axis=1)

        memory_plus_input = K.concatenate([memory, inputs_reshape], axis=1)
        next_memory = self._attend_over_memory(memory_plus_input)

        n = inputs_reshape.get_shape().as_list()[1]
        next_memory = next_memory[:, :-n, :]

        input_gate, forget_gate = self._create_gates(inputs_reshape, memory)
        next_memory = input_gate * K.tanh(next_memory)
        next_memory += forget_gate * memory
        next_memory = K.batch_flatten(next_memory)

        return next_memory, (next_memory, )
Пример #12
0
 def call(self, inputs):
     if not isinstance(inputs, list):
         raise ValueError('A merge layer should be called '
                          'on a list of inputs.')
     if self._reshape_required:
         reshaped_inputs = []
         input_ndims = list(map(K.ndim, inputs))
         if None not in input_ndims:
             # If ranks of all inputs are available,
             # we simply expand each of them at axis=1
             # until all of them have the same rank.
             max_ndim = max(input_ndims)
             for x in inputs:
                 x_ndim = K.ndim(x)
                 for _ in range(max_ndim - x_ndim):
                     x = K.expand_dims(x, 1)
                 reshaped_inputs.append(x)
             return self._merge_function(reshaped_inputs)
         else:
             # Transpose all inputs so that batch size is the last dimension.
             # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size)
             transposed = False
             for x in inputs:
                 x_ndim = K.ndim(x)
                 if x_ndim is None:
                     x_shape = K.shape(x)
                     batch_size = x_shape[0]
                     new_shape = K.concatenate(
                         [x_shape[1:],
                          K.expand_dims(batch_size)])
                     x_transposed = K.reshape(
                         x, K.stack([batch_size,
                                     K.prod(x_shape[1:])]))
                     x_transposed = K.permute_dimensions(
                         x_transposed, (1, 0))
                     x_transposed = K.reshape(x_transposed, new_shape)
                     reshaped_inputs.append(x_transposed)
                     transposed = True
                 elif x_ndim > 1:
                     dims = list(range(1, x_ndim)) + [0]
                     reshaped_inputs.append(K.permute_dimensions(x, dims))
                     transposed = True
                 else:
                     # We don't transpose inputs if they are 1D vectors or scalars.
                     reshaped_inputs.append(x)
             y = self._merge_function(reshaped_inputs)
             y_ndim = K.ndim(y)
             if transposed:
                 # If inputs have been transposed, we have to transpose the output too.
                 if y_ndim is None:
                     y_shape = K.shape(y)
                     y_ndim = K.shape(y_shape)[0]
                     batch_size = y_shape[y_ndim - 1]
                     new_shape = K.concatenate(
                         [K.expand_dims(batch_size), y_shape[:y_ndim - 1]])
                     y = K.reshape(y, (-1, batch_size))
                     y = K.permute_dimensions(y, (1, 0))
                     y = K.reshape(y, new_shape)
                 elif y_ndim > 1:
                     dims = [y_ndim - 1] + list(range(y_ndim - 1))
                     y = K.permute_dimensions(y, dims)
             return y
     else:
         return self._merge_function(inputs)
 def call(self, inputs):
     return K.reshape(inputs, (K.shape(inputs)[0], ) + self.target_shape)
Пример #14
0
 def call(self, inputs):
   return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape)