Пример #1
0
    def generator(self, src_enc):
        G_h = K.bias_add(K.dot(src_enc, self.G_w1), self.G_b1)
        G_h_relu = tf.nn.relu(G_h)
        G_log_prob = K.bias_add(K.dot(G_h_relu, self.G_w2), self.G_b2)
        G_prob = tf.nn.sigmoid(G_log_prob)

        return G_prob
Пример #2
0
def step_gru(cell_inputs, cell_state, kernel, recurrent_kernel, input_bias,
             recurrent_bias):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_state

    # inputs projected by all gate matrices at once
    matrix_x = K.dot(cell_inputs, kernel)
    matrix_x = K.bias_add(matrix_x, input_bias)

    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

    # hidden state projected by all gate matrices at once
    matrix_inner = K.dot(h_tm1, recurrent_kernel)
    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)

    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner,
                                                            3,
                                                            axis=1)
    z = nn.sigmoid(x_z + recurrent_z)
    r = nn.sigmoid(x_r + recurrent_r)
    hh = nn.tanh(x_h + r * recurrent_h)

    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]
Пример #3
0
    def call(self, inputs, prev_projection, states, training=None):
        prev_output = states[0]

        dp_mask = self.get_dropout_mask_for_cell(inputs, training)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
            prev_output, training)

        if dp_mask is not None:
            inputs = inputs * dp_mask
        output = K.dot(inputs, self.kernel)

        if self.use_recurrent:
            if rec_dp_mask is not None:
                prev_output = prev_output * rec_dp_mask
            output += K.dot(prev_output, self.recurrent_kernel)

        if self.use_feedback:
            if self.projection_activation is not None:
                prev_projection = self.projection_activation(prev_projection)
            output += K.dot(prev_projection, self.feedback_kernel)

        if self.bias is not None:
            output = K.bias_add(output, self.bias)

        if self.activation is not None:
            output = self.activation(output)

        projection = K.dot(output, self.projection_kernel)

        if self.projection_bias is not None:
            projection = K.bias_add(projection, self.projection_bias)

        return output, projection, [output]
Пример #4
0
    def call(self, inputs, states):
        last_h = states[0]
        last_c = states[1]
        w_i, w_f, w_c, w_o = tf.split(self.w, num_or_size_splits=4, axis=1)
        b_i, b_f, b_c, b_o = tf.split(self.bias, num_or_size_splits=4, axis=0)
        # w x
        x_i = K.dot(inputs, w_i)
        x_f = K.dot(inputs, w_f)
        x_c = K.dot(inputs, w_c)
        x_o = K.dot(inputs, w_o)
        # w x + b
        x_i = K.bias_add(x_i, b_i)
        x_f = K.bias_add(x_f, b_f)
        x_c = K.bias_add(x_c, b_c)
        x_o = K.bias_add(x_o, b_o)

        u_i, u_f, u_c, u_o = tf.split(self.u, num_or_size_splits=4, axis=1)
        # w x + u * h + x
        i = self.recurrent_activation(x_i + K.dot(last_h, u_i))
        f = self.recurrent_activation(x_f + K.dot(last_h, u_f))
        c = (1 - i) * last_c + self.activation(x_c + K.dot(last_h, u_c))
        o = self.recurrent_activation(x_o + K.dot(last_h, u_o))

        # 计算 h
        h = o * self.activation(c)
        return h, (h, c)
Пример #5
0
    def call(self, inputs, **kwargs):
        gate_outputs = []
        final_outputs = []

        # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
        expert_outputs = tf.tensordot(a=inputs, b=self.expert_kernels, axes=1)
        # Add the bias term to the expert weights if necessary
        expert_outputs = K.bias_add(x=expert_outputs, bias=self.expert_bias)
        expert_outputs = self.expert_activation(expert_outputs)

        # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
        for index, gate_kernel in enumerate(self.gate_kernels):
            gate_output = K.dot(x=inputs, y=gate_kernel)
            # Add the bias term to the gate weights if necessary
            gate_output = K.bias_add(x=gate_output, bias=self.gate_bias[index])
            gate_output = self.gate_activation(gate_output)
            gate_outputs.append(gate_output)

        # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
        for gate_output in gate_outputs:
            expanded_gate_output = tf.expand_dims(gate_output, axis=1)
            weighted_expert_output = expert_outputs * K.repeat_elements(
                expanded_gate_output, self.units, axis=1)
            final_outputs.append(K.sum(weighted_expert_output, axis=2))
        return final_outputs
Пример #6
0
    def call(self, inputs, states, training=None):
        vh = states[0]

        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=2)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(vh,
                                                               training,
                                                               count=2)

        if 0. < self.dropout < 1.:
            input1 = inputs * dp_mask[0]
            input2 = inputs * dp_mask[1]
        else:
            input1 = inputs
            input2 = inputs

        p11 = K.dot(input1, self.kernel[:, :self.units])
        p21 = K.dot(input2, self.kernel[:, self.units:])
        if self.use_bias:
            p11 = K.bias_add(p11, self.bias[:self.units])
            p21 = K.bias_add(p21, self.bias[self.units:])
        if 0. < self.recurrent_dropout < 1.:
            vh1 = vh * rec_dp_mask[0]
            vh2 = vh * rec_dp_mask[1]
        else:
            vh1 = vh
            vh2 = vh

        v1 = self.recurrent_activation(
            p11 + K.dot(vh1, self.recurrent_kernel[:, :self.units]))
        v2 = self.activation(p21 +
                             K.dot(vh2 *
                                   v1, self.recurrent_kernel[:, self.units:]))
        vh = (1 - v1) * vh + v1 * v2
        return vh, [vh]
Пример #7
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1,
                                                               training,
                                                               count=4)

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs
        # k_i, k_f, k_c, k_o = array_ops.split(
        #     self.kernel, num_or_size_splits=4, axis=1)
        x_i = K.dot(inputs_i, self.kernel_i)
        x_f = K.dot(inputs_f, self.kernel_f)
        x_c = K.dot(inputs_c, self.kernel_c)
        x_o = K.dot(inputs_o, self.kernel_o)
        if self.use_bias:
            # b_i, b_f, b_c, b_o = array_ops.split(
            #     self.bias, num_or_size_splits=4, axis=0)
            x_i = K.bias_add(x_i, self.bias_i)
            x_f = K.bias_add(x_f, self.bias_f)
            x_c = K.bias_add(x_c, self.bias_c)
            x_o = K.bias_add(x_o, self.bias_o)

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1
        # x = (x_i, x_f, x_c, x_o)
        # h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o)
        # c, o = self._compute_carry_and_output(x, h_tm1, c_tm1)

        i = self.recurrent_activation(x_i +
                                      K.dot(h_tm1_i, self.recurrent_kernel_i))
        f = self.recurrent_activation(x_f +
                                      K.dot(h_tm1_f, self.recurrent_kernel_f))
        c = f * c_tm1 + i * self.activation(
            x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))
        o = self.recurrent_activation(x_o +
                                      K.dot(h_tm1_o, self.recurrent_kernel_o))

        h = o * self.activation(c)
        return h, [h, c]
Пример #8
0
    def call(self, inputs):
        outputs = []

        if self.data_format == 'channels_first':
            count = 0

            for c in range(self.input_spec.axes[1]):
                input = inputs[:, c:c+1, ...]

                for d in range(self.depth_multiplier):
                    output = K.conv3d(input
                                      , self.depthwise_kernels[count]
                                      , padding=self.padding
                                      , data_format=self.data_format
                                      , dilation_rate=self.dilation_rate)

                    if self.use_bias:
                        output = K.bias_add(output
                                            , self.biases[count]
                                            , data_format=self.data_format)

                    outputs.append(output)
                    count +=1

            outputs = K.concatenate(outputs, axis=1)
        else:
            count = 0

            for c in range(self.input_spec.axes[4]):
                input = inputs[:, c:c + 1, ...]

                for d in range(self.depth_multiplier):
                    output = K.conv3d(input
                                      , self.depthwise_kernels[count]
                                      , padding=self.padding
                                      , data_format=self.data_format
                                      , dilation_rate=self.dilation_rate)

                    if self.use_bias:
                        output = K.bias_add(output
                                            , self.biases[count]
                                            , data_format=self.data_format)

                    outputs.append(output)
                    count += 1

            outputs = K.concatenate(outputs, axis=4)

        outputs = K.conv3d(outputs
                           , self.pointwise_kernel
                           , padding=self.padding
                           , data_format=self.data_format
                           , dilation_rate=self.dilation_rate)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #9
0
def _preprocess_symbolic_input(x, data_format, mode):
    """Preprocesses a tensor encoding a batch of images.

  Arguments:
      x: Input tensor, 3D or 4D.
      data_format: Data format of the image tensor.
      mode: One of "caffe", "tf" or "torch".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,
              sample-wise.
          - torch: will scale pixels between 0 and 1 and then
              will normalize each channel with respect to the
              ImageNet dataset.

  Returns:
      Preprocessed tensor.
  """
    global _IMAGENET_MEAN

    if mode == 'tf':
        x /= 127.5
        x -= 1.
        return x

    if mode == 'torch':
        x /= 255.
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
    else:
        if data_format == 'channels_first':
            # 'RGB'->'BGR'
            if K.ndim(x) == 3:
                x = x[::-1, ...]
            else:
                x = x[:, ::-1, ...]
        else:
            # 'RGB'->'BGR'
            x = x[..., ::-1]
        mean = [103.939, 116.779, 123.68]
        std = None

    if _IMAGENET_MEAN is None:
        _IMAGENET_MEAN = constant_op.constant(-np.array(mean),
                                              dtype=K.floatx())

    # Zero-center by mean pixel
    if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
        x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)),
                       data_format)
    else:
        x = K.bias_add(x, _IMAGENET_MEAN, data_format)
    if std is not None:
        x /= std
    return x
Пример #10
0
def _preprocess_symbolic_input(x, data_format, mode):
  """Preprocesses a tensor encoding a batch of images.

  Arguments:
      x: Input tensor, 3D or 4D.
      data_format: Data format of the image tensor.
      mode: One of "caffe", "tf" or "torch".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,
              sample-wise.
          - torch: will scale pixels between 0 and 1 and then
              will normalize each channel with respect to the
              ImageNet dataset.

  Returns:
      Preprocessed tensor.
  """
  global _IMAGENET_MEAN

  if mode == 'tf':
    x /= 127.5
    x -= 1.
    return x

  if mode == 'torch':
    x /= 255.
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
  else:
    if data_format == 'channels_first':
      # 'RGB'->'BGR'
      if K.ndim(x) == 3:
        x = x[::-1, ...]
      else:
        x = x[:, ::-1, ...]
    else:
      # 'RGB'->'BGR'
      x = x[..., ::-1]
    mean = [103.939, 116.779, 123.68]
    std = None

  if _IMAGENET_MEAN is None:
    _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx())

  # Zero-center by mean pixel
  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
    x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
  else:
    x = K.bias_add(x, _IMAGENET_MEAN, data_format)
  if std is not None:
    x /= std
  return x
Пример #11
0
 def call(self, inputs, **kwargs):
     input_shape = K.int_shape(inputs)
     d_model = input_shape[-1]
     step1 = self.activation(
         K.bias_add(K.dot(K.reshape(inputs, (-1, d_model)),
                          self.transition_weights['weights1']),
                    self.transition_weights['biases1'],
                    data_format='channels_last'))
     step2 = K.bias_add(K.dot(step1, self.transition_weights['weights2']),
                        self.transition_weights['biases2'],
                        data_format='channels_last')
     result = K.reshape(step2, (-1, ) + input_shape[-2:])
     return result
Пример #12
0
    def call(self, inputs):
        h = K.bias_add(K.dot(inputs, self.fc_kernel), self.fc_bias)
        relu_h = K.tanh(h)

        self.mu = K.bias_add(K.dot(relu_h, self.mu_kernel), self.mu_bias)
        self.logvar = K.bias_add(K.dot(relu_h, self.sigma_kernel),
                                 self.sigma_bias)

        h_z = self.sample_z(self.mu, self.logvar)

        z = K.bias_add(K.dot(h_z, self.trans_kernel), self.trans_bias)
        z = K.tanh(z)

        return z
Пример #13
0
 def _compute_carry_and_output(self, x, h_tm1, c_tm1, b):
     """Computes carry and output using split kernels."""
     x_i, x_f, x_c, x_o = x
     h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1
     b_i2, b_f2, b_c2, b_o2 = b
     i = self.recurrent_activation(
         x_i + K.bias_add(K.dot(h_tm1_i, K.transpose(self.recurrent_kernel[:, :self.units])), b_i2))
     f = self.recurrent_activation(x_f + K.bias_add(K.dot(
         h_tm1_f, K.transpose(self.recurrent_kernel[:, self.units:self.units * 2])), b_f2))
     c = f * c_tm1 + i * self.activation(x_c + K.bias_add(K.dot(
         h_tm1_c, K.transpose(self.recurrent_kernel[:, self.units * 2:self.units * 3])), b_c2))
     o = self.recurrent_activation(
         x_o + K.bias_add(K.dot(h_tm1_o, K.transpose(self.recurrent_kernel[:, self.units * 3:])), b_o2))
     return c, o
Пример #14
0
 def call(self, inputs, **kwargs):
     main_input, embedding_matrix = inputs
     input_shape_tensor = K.shape(main_input)
     last_input_dim = K.int_shape(main_input)[-1]
     emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix)
     projected = K.dot(K.reshape(main_input, (-1, last_input_dim)),
                       self.embedding_weights['projection'])
     if self.add_biases:
         projected = K.bias_add(projected,
                                self.embedding_weights['biases'],
                                data_format='channels_last')
     if 0 < self.projection_dropout < 1:
         projected = K.in_train_phase(
             lambda: K.dropout(projected, self.projection_dropout),
             projected,
             training=kwargs.get('training'))
     attention = K.dot(projected, K.transpose(embedding_matrix))
     if self.scaled_attention:
         # scaled dot-product attention, described in
         # "Attention is all you need" (https://arxiv.org/abs/1706.03762)
         sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx())
         attention = attention / sqrt_d
     result = K.reshape(
         self.activation(attention),
         (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
     return result
Пример #15
0
    def call(self, inputs):
        X = inputs[0]  # Node features (B x N x F)
        A = inputs[1]  # Adjacency matrix (B x N x N)

        X_dims = X.get_shape().as_list()
        B, N, F = X_dims

        merged = tf.matmul(K.dot(X, self.self_kernel),
                           tf.transpose(X, (0, 2, 1)))
        attention = tf.nn.tanh(merged)
        attention = K.reshape(attention, (-1, N, N))

        if self.use_bias:
            attention = K.bias_add(attention, self.bias)

        mask = -10e9 * (1.0 - A)
        attention += mask

        attention = tf.nn.softmax(attention)
        output = tf.matmul(attention, X)

        if self.return_attention:
            return (output, attention)
        else:
            return output
Пример #16
0
    def call(self, inputs, training=None):
        def _l2normalize(v, eps=1e-12):
            return v / (K.sum(v**2)**0.5 + eps)

        def power_iteration(W, u):
            _u = u
            _v = _l2normalize(K.dot(_u, K.transpose(W)))
            _u = _l2normalize(K.dot(_v, W))
            return _u, _v

        if self.spectral_normalization:
            W_shape = self.kernel.shape.as_list()
            # Flatten the Tensor
            W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
            _u, _v = power_iteration(W_reshaped, self.u)
            # Calculate Sigma
            sigma = K.dot(_v, W_reshaped)
            sigma = K.dot(sigma, K.transpose(_u))
            # normalize it
            W_bar = W_reshaped / sigma
            # reshape weight tensor
            if training in {0, False}:
                W_bar = K.reshape(W_bar, W_shape)
            else:
                with tf.control_dependencies([self.u.assign(_u)]):
                    W_bar = K.reshape(W_bar, W_shape)

            # update weitht
            self.kernel = W_bar

        if self.rank == 1:
            outputs = K.conv1d(inputs,
                               self.kernel,
                               strides=self.strides[0],
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate[0])
        if self.rank == 2:
            outputs = K.conv2d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)
        if self.rank == 3:
            outputs = K.conv3d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(outputs,
                                 self.bias,
                                 data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #17
0
    def call(self, inputs, params=None):
        if params[self.name + '/depthwise_kernel:0'] is None:
            return super(layers.DepthwiseConv2D, self).call(inputs)
        else:
            depthwise_kernel = params.get(self.name + '/depthwise_kernel:0')
            bias = params.get(self.name + '/bias:0')

        outputs = backend.depthwise_conv2d(
            inputs,
            depthwise_kernel,
            strides=self.strides,
            padding=self.padding,
            dilation_rate=self.dilation_rate,
            data_format=self.data_format)

        if self.use_bias:
            outputs = backend.bias_add(
                outputs,
                bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)

        return outputs
Пример #18
0
 def call(self, inputs):
     output = K.dot(inputs, self.kernel * self.connections)
     if self.use_bias:
         output = K.bias_add(output, self.bias)
     if self.activation is not None:
         output = self.activation(output)
     return output
Пример #19
0
    def call(self, inputs):
        if self.implementation == 1:
            output = K.local_conv(inputs, self.kernel, self.kernel_size,
                                  self.strides,
                                  (self.output_row, self.output_col),
                                  self.data_format)

        elif self.implementation == 2:
            output = local_conv_matmul(inputs, self.kernel, self.kernel_mask,
                                       self.compute_output_shape(inputs.shape))

        elif self.implementation == 3:
            output = local_conv_sparse_matmul(
                inputs, self.kernel, self.kernel_idxs, self.kernel_shape,
                self.compute_output_shape(inputs.shape))

        else:
            raise ValueError('Unrecognized implementation mode: %d.' %
                             self.implementation)

        if self.use_bias:
            output = K.bias_add(output,
                                self.bias,
                                data_format=self.data_format)

        output = self.activation(output)
        return output
Пример #20
0
    def call(self, inputs):
        backend = K.backend()

        if backend == "theano":
            Exception(
                'This version of DeepCell only works with the tensorflow backend'
            )

        if self.data_format == 'channels_first':
            output = tf.tensordot(inputs, self.kernel, axes=[[1], [0]])
            output = tf.transpose(output, perm=[0, 3, 1, 2])
            # output = K.dot(inputs, self.kernel)

        elif self.data_format == 'channels_last':
            output = tf.tensordot(inputs, self.kernel, axes=[[3], [0]])

        if self.use_bias:
            output = K.bias_add(output,
                                self.bias,
                                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(output)

        return output
Пример #21
0
 def call(self, inputs):
   output = K.local_conv1d(inputs, self.kernel, self.kernel_size, self.strides)
   if self.use_bias:
     output = K.bias_add(output, self.bias)
   if self.activation is not None:
     output = self.activation(output)
   return output
    def call(self, inputs):
        if self.padding == 'causal':
            inputs = array_ops.pad(inputs, self._compute_causal_padding())
        if self.data_format == 'channels_last':
            spatial_start_dim = 1
        else:
            spatial_start_dim = 2

        # Explicitly broadcast inputs and kernels to 4D.
        strides = self.strides * 2
        inputs = array_ops.expand_dims(inputs, spatial_start_dim)
        depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0)
        dilation_rate = (1, ) + self.dilation_rate

        outputs = backend.depthwise_conv2d(inputs,
                                           depthwise_kernel,
                                           strides=strides,
                                           padding=self.padding,
                                           dilation_rate=dilation_rate,
                                           data_format=self.data_format)

        if self.use_bias:
            outputs = backend.bias_add(outputs,
                                       self.bias,
                                       data_format=self.data_format)

        outputs = array_ops.squeeze(outputs, [spatial_start_dim])

        if self.activation is not None:
            return self.activation(outputs)

        return outputs
Пример #23
0
    def _call_one_layer(self, inputs, flatten_memory, training, ws):
        dp_mask = self.get_dropout_mask_for_cell(
            inputs, training, count=1)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
            flatten_memory, training, count=1)

        if 0 < self.dropout < 1:
            inputs = inputs * dp_mask[0]
        if 0 < self.recurrent_dropout < 1:
            flatten_memory = flatten_memory * rec_dp_mask[0]

        memory = array_ops.reshape(
            flatten_memory, shape=[-1, self.num_memory_slots, self.units])

        input_gate, forget_gate = self._input_and_forget_gates(inputs, memory, ws)
        hs, new_memory = self._attend_over_memory(inputs, memory, ws)

        next_memory = input_gate * new_memory + forget_gate * memory

        flatten_next_memory = array_ops.reshape(
            next_memory, shape=[-1, self.num_memory_slots * self.units])

        mus_and_log_sigmas = K.dot(hs, ws["random_kernel"])
        mus_and_log_sigmas = K.bias_add(mus_and_log_sigmas, ws["random_bias"])
        mus, log_sigmas = array_ops.split(mus_and_log_sigmas, 2, axis=-1)
        sigmas = K.log(1.0 + K.exp(log_sigmas + self.sigma_bias))
        zs = K.random_normal(shape=K.shape(mus)) * sigmas + mus

        return zs, mus, sigmas, hs, flatten_next_memory
 def call(self, inputs, training=None):
     def _l2normalize(v, eps=1e-12):
         return v / (K.sum(v ** 2) ** 0.5 + eps)
     def power_iteration(W, u):
         _u = u
         _v = _l2normalize(K.dot(_u, K.transpose(W)))
         _u = _l2normalize(K.dot(_v, W))
         return _u, _v
     W_shape = self.kernel.shape.as_list()
     #Flatten the Tensor
     W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
     _u, _v = power_iteration(W_reshaped, self.u)
     #Calculate Sigma
     sigma=K.dot(_v, W_reshaped)
     sigma=K.dot(sigma, K.transpose(_u))
     #normalize it
     W_bar = W_reshaped / sigma
     #reshape weight tensor
     if training in {0, False}:
         W_bar = K.reshape(W_bar, W_shape)
     else:
         with tf.control_dependencies([self.u.assign(_u)]):
              W_bar = K.reshape(W_bar, W_shape)  
     output = K.dot(inputs, W_bar)
     if self.use_bias:
         output = K.bias_add(output, self.bias, data_format='channels_last')
     if self.activation is not None:
         output = self.activation(output)
     return output 
Пример #25
0
    def call(self, inputs, prev_states):
        output_fb = self.prev_states[0]
        recur_output = self.prev_states[1]
        if self.cell.use_clock is False:
            input = K.dot(self.inputs * self.cell.in_dropout_mask,
                          self.cell.kern_1)  
        else:
            input = K.dot(self.inputs * self.cell.in_dropout_mask * self.cell.clock_kernel,
                          self.cell.kern_1)

        if self.cell.use_out_fb is not False:
             x = K.dot(_pad(self.cell.out_fb_kern * output_fb, (self.cell.in_row, self.cell.in_col)),
                       self.inputs)
             input = K.bias_add(x, input)
                    
        if self.cell.use_recur is True:
            reservoir_output_1 = recur_output * self.cell.recur_dropout_mask
            reservoir_output_2 = K.dot(input, self.cell.kern_2)
            reservoir_output = K.bias_add(kern_output_1
                                     kern_output_2)
        else:
            reservoir_output = K.dot(input, self.cell.kern_2)
               
        output =  K.dot(reservoir_output, kern_3)
            
        return output, [output, reservoir_output]                                                                                    
Пример #26
0
    def call(self, inputs):
        X = inputs[0]  # Node features (N x F)
        A = inputs[1]  # Adjacency matrix (N x N)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F")
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F" x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F")

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearty
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense)  # (N x N)

            # Apply dropout to features and attention coefficients
            dropout_attn = Dropout(self.dropout_rate)(dense)  # (N x N)
            dropout_feat = Dropout(self.dropout_rate)(features)  # (N x F")

            # Linear combination with neighbors" features
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F")

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            if self.attn_heads_reduction == "concat":
                # If "concat", compute the activation here (Eq. 5)
                node_features = self.activation(node_features)

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads" output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF")
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F")

        output = self.activation(output)
        return output
    def call(self, inputs):
        X = inputs[0]  # Node features (B x N x F)
        A = inputs[1]  # Adjacency matrix (B x N x N)

        X_dims = X.get_shape().as_list()
        B, N, F = X_dims

        outputs = []
        attentions = []
        for head in range(self.attn_heads):
            # W in the paper (F x F")
            kernel = self.kernels[head]

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (B x N x F")
            dropout_feat = Dropout(self.dropout_rate)(features)  # (B x N x F")

            neighbor_kernel = self.neighbor_kernels[head]
            attn_kernel = self.attn_kernels[head]

            neighbor_features = K.dot(X, neighbor_kernel)
            dropout_neighbor = Dropout(self.dropout_rate)(neighbor_features)

            merged = tf.matmul(K.dot(dropout_feat, attn_kernel),
                               tf.transpose(dropout_neighbor, (0, 2, 1)))

            attention = tf.nn.tanh(merged)
            attention = K.reshape(attention, (-1, N, N))

            mask = -10e9 * (1.0 - A)
            attention += mask

            attention = tf.nn.softmax(attention)
            dropout_attn = Dropout(self.dropout_rate)(attention)

            node_features = tf.matmul(dropout_attn, dropout_feat)

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            if self.return_attention:
                attentions.append(attention)
            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads" output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs, axis=-1)  # (B x N x KF")
        else:
            output = K.mean(K.stack(outputs), axis=0)  # (B x N x F")
            # If "average", compute the activation here (Eq. 6)

        output = self.activation(output)

        if self.return_attention:
            attentions = K.stack(attentions, axis=1)
            return (output, attentions)
        else:
            return output
Пример #28
0
 def call(self, inputs):
     binary_kernel = binarize(self.kernel, H=self.H)
     output = K.dot(inputs, binary_kernel)
     if self.use_bias:
         output = K.bias_add(output, self.bias)
     if self.activation is not None:
         output = self.activation(output)
     return output
Пример #29
0
    def call(self, inputs):
        input_shape = K.shape(inputs)
        batch_size = input_shape[0]
        if self.data_format == 'channels_first': #?
            h_axis, w_axis = 2, 3
        else:
            h_axis, w_axis = 1, 2

        height, width = input_shape[h_axis], input_shape[w_axis]
        kernel_h, kernel_w = self.kernel_size
        stride_h, stride_w = self.strides
        if self.output_padding is None:
            out_pad_h = out_pad_w = None
        else:
            out_pad_h, out_pad_w = self.output_padding

        # Infer the dynamic output shape:
        out_height = conv_utils.deconv_output_length(height
                                              , kernel_h
                                              , self.padding
                                              , output_padding=out_pad_h
                                              , stride=stride_h
                                              , dilation=self.dilation_rate[0])
        out_width = conv_utils.deconv_output_length(width
                                            , kernel_w
                                            , self.padding
                                            , output_padding=out_pad_w
                                            , stride=stride_w
                                            , dilation=self.dilation_rate[1])
        if self.data_format == 'channels_first':
            output_shape = (batch_size, self.filters, out_height, out_width)
        else:
            output_shape = (batch_size, out_height, out_width, self.filters)

        scaled_kernel = self.kernel * self.runtime_coeff
        kernel = Ke.transpose(scaled_kernel,[0, 1, 3, 2]) #?
        kernel = Ke.pad(kernel
                            , [[1,1], [1,1], [0,0], [0,0]]) 
        fused_kernel = Ke.add_n([kernel[1:, 1:]
                               , kernel[:-1, 1:]
                               , kernel[1:, :-1]
                               , kernel[:-1, :-1]]) #?       
        outputs = K.conv2d_transpose(inputs
            , fused_kernel
            , output_shape
            , self.strides
            , padding=self.padding
            , data_format=self.data_format
            , dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(outputs
                , self.bias
                , data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #30
0
  def call(self, inputs):
    output = K.local_conv(inputs, self.kernel, self.kernel_size, self.strides,
                          (self.output_row, self.output_col), self.data_format)

    if self.use_bias:
      output = K.bias_add(output, self.bias, data_format=self.data_format)

    output = self.activation(output)
    return output
Пример #31
0
 def input_conv(self, x, w, b=None, padding='valid'):
   conv_out = backend.conv2d(x, w, strides=self.strides,
                             padding=padding,
                             data_format=self.data_format,
                             dilation_rate=self.dilation_rate)
   if b is not None:
     conv_out = backend.bias_add(conv_out, b,
                                 data_format=self.data_format)
   return conv_out
Пример #32
0
 def input_conv_u(self, x, w, b=None, padding='same'):
   conv_out = K.conv2d(x, w, strides=self.strides,
                       padding=padding,
                       data_format='channels_last',
                       dilation_rate=self.dilation_rate)
   if b is not None:
     conv_out = K.bias_add(conv_out, b,
                           data_format='channels_last')
   return conv_out
 def input_conv(self, x, w, b=None, padding='valid'):
   conv_out = K.conv2d(x, w, strides=self.strides,
                       padding=padding,
                       data_format=self.data_format,
                       dilation_rate=self.dilation_rate)
   if b is not None:
     conv_out = K.bias_add(conv_out, b,
                           data_format=self.data_format)
   return conv_out
Пример #34
0
  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]

    # inputs projected by all gate matrices at once
    matrix_x = K.dot(cell_inputs, kernel)
    matrix_x = K.bias_add(matrix_x, input_bias)

    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

    # hidden state projected by all gate matrices at once
    matrix_inner = K.dot(h_tm1, recurrent_kernel)
    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)

    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
                                                            axis=1)
    z = recurrent_activation(x_z + recurrent_z)
    r = recurrent_activation(x_r + recurrent_r)
    hh = activation(x_h + r * recurrent_h)

    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]
Пример #35
0
  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, bias)

    z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]
Пример #36
0
  def call(self, inputs):
    if self.implementation == 1:
      output = K.local_conv(inputs, self.kernel, self.kernel_size, self.strides,
                            (self.output_length,), self.data_format)

    elif self.implementation == 2:
      output = local_conv_matmul(inputs, self.kernel, self.kernel_mask,
                                 self.compute_output_shape(inputs.shape))

    else:
      raise ValueError('Unrecognized implementation mode: %d.'
                       % self.implementation)

    if self.use_bias:
      output = K.bias_add(output, self.bias, data_format=self.data_format)

    output = self.activation(output)
    return output
Пример #37
0
  def step(cell_inputs, cell_states):
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    # Only use the second half of the bias weights.
    _, real_bias = array_ops.split(bias, 2)

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, real_bias)

    z0 = z[:, :units]
    z1 = z[:, units:2 * units]
    z2 = z[:, 2 * units:3 * units]
    z3 = z[:, 3 * units:]

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]