Пример #1
0
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        #final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:,:,:,0]) #shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            # o = K.batch_dot(c, u_hat_vecs, [2, 2])
            o = tf.einsum('bin,binj->bij', c, u_hat_vecs)
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                # b = K.batch_dot(o, u_hat_vecs, [2, 3])
                b = tf.einsum('bij,binj->bin', o, u_hat_vecs)
                if K.backend() == 'theano':
                    b = K.sum(b, axis=1)

        return self.activation(o)
Пример #2
0
    def call(self, inputs):
        

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(keras.backend.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = keras.backend.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o
Пример #3
0
def conv1d(inputs, filter_, data_format='channels_first'):
    filter_ = K.expand_dims(filter_)
    zero = K.constant(np.zeros(filter_.shape))
    filter_first_row = K.concatenate([filter_, zero], axis=0)
    filter_second_row = K.concatenate([zero, filter_], axis=0)
    full_filter = K.concatenate([filter_first_row, filter_second_row], axis=-1)
    return K.conv1d(inputs, full_filter, data_format=data_format)
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(
                b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs
    def call(self, u_ves):
        print(self.W_kernel.shape)
        print("*****", u_ves.shape)
        u_ves = tf.transpose(u_ves, perm=[0, 2, 1])

        print("*****", u_ves.shape)
        u_hat_vecs = K.conv1d(u_ves, self.W_kernel)
        print("*****", u_hat_vecs.shape)
        batch_size = tf.shape(u_ves)[0]
        input_num_capsule = tf.shape(u_ves)[1]
        u_hat_vecs = tf.reshape(u_hat_vecs,
                                (batch_size, input_num_capsule,
                                 self.out_num_capsule, self.out_dim_capusle))
        u_hat_vecs = tf.transpose(
            u_hat_vecs, perm=[0, 2, 1, 3]
        )  # finally shape = [N0ne,out_num_capsule,input_num_capsule,out_dim_capsule]

        # Dynamic routing
        b = tf.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  #shape = [N0ne,out_num_capsule,input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            output = K.batch_dot(c, u_hat_vecs, [2, 2])
            output = self.activation(output)
            if i < self.routings - 1:
                #                 o = tf.nn.l2_normalize(o,-1)
                b = b + K.batch_dot(output, u_hat_vecs, [2, 3])
        pose = output
        print("pose is :", pose.shape)
        return pose
    def call(self, inputs):
        def _l2normalize(v, eps=1e-12):
            return v / (K.sum(v**2)**0.5 + eps)

        def power_iteration(W, u):
            _u = u
            _v = _l2normalize(K.dot(_u, K.transpose(W)))
            _u = _l2normalize(K.dot(_v, W))
            return _u, _v

        if self.spectral_normalization:
            W_shape = self.kernel.shape.as_list()
            #Flatten the Tensor
            W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
            _u, _v = power_iteration(W_reshaped, self.u)
            #Calculate Sigma
            sigma = K.dot(_v, W_reshaped)
            sigma = K.dot(sigma, K.transpose(_u))
            #normalize it
            W_bar = W_reshaped / sigma
            #reshape weight tensor
            if training in {0, False}:
                W_bar = K.reshape(W_bar, W_shape)
            else:
                with tf.control_dependencies([self.u.assign(_u)]):
                    W_bar = K.reshape(W_bar, W_shape)

            #update weitht
            self.kernel = W_bar

        if self.rank == 1:
            outputs = K.conv1d(inputs,
                               self.kernel,
                               strides=self.strides[0],
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate[0])
        if self.rank == 2:
            outputs = K.conv2d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)
        if self.rank == 3:
            outputs = K.conv3d(inputs,
                               self.kernel,
                               strides=self.strides,
                               padding=self.padding,
                               data_format=self.data_format,
                               dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(outputs,
                                 self.bias,
                                 data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #7
0
    def call(self, inputs):
        if self.share_weights:
            u_hat_vectors = K.conv1d(inputs, self.W)
        else:
            u_hat_vectors = K.local_conv1d(inputs, self.W, [1], [1])

        # u_hat_vectors : The spatially transformed input vectors (with local_conv_1d)

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        u_hat_vectors = K.reshape(u_hat_vectors,
                                  (batch_size, input_num_capsule,
                                   self.num_capsule, self.dim_capsule))

        u_hat_vectors = K.permute_dimensions(u_hat_vectors, (0, 2, 1, 3))
        routing_weights = K.zeros_like(u_hat_vectors[:, :, :, 0])

        for i in range(self.routings):
            capsule_weights = K.softmax(routing_weights, 1)
            outputs = K.batch_dot(capsule_weights, u_hat_vectors, [2, 2])
            if K.ndim(outputs) == 4:
                outputs = K.sum(outputs, axis=1)
            if i < self.routings - 1:
                outputs = K.l2_normalize(outputs, -1)
                routing_weights = K.batch_dot(outputs, u_hat_vectors, [2, 3])
                if K.ndim(routing_weights) == 4:
                    routing_weights = K.sum(routing_weights, axis=1)

        return self.activation(outputs)
    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(caps_batch_dot(c, hat_inputs))
            if i < self.routings - 1:
                b = caps_batch_dot(o, hat_inputs)
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o
Пример #9
0
    def call(self, x):
        X = [0] * 128
        for i in range(self.n_kernels):
            X[i] = softplus(
                K.conv1d(x[:, :, i:i + 1], self.kernels[i], padding='same'))
            K.bias_add(X[i], self.bias[i], data_format=self.data_format)
        X = K.concatenate(X, axis=2)

        return X
Пример #10
0
def tokenize(x, n, padding='same'):
    Y = K.argmax(x, axis=-1)
    if n == 1:
        return Y
    W = K.constant([[[4**k]] for k in range(n)])
    Y = K.cast(Y, dtype=tf.float32)
    W = K.cast(W, dtype=tf.float32)
    Y = K.expand_dims(Y, axis=-1)
    Y = K.conv1d(x=Y, kernel=W, strides=1, padding=padding)
    Y = K.squeeze(Y, axis=-1)
    return Y
Пример #11
0
    def call(self, x):
        # filters = K.zeros(shape=(N_filt, Filt_dim))

        # Get beginning and end frequencies of the filters.
        min_freq = 50.0
        min_band = 50.0
        filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale
        filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) +
                                         min_band / self.freq_scale)

        # Filter window (hamming).
        n = np.linspace(0, self.Filt_dim, self.Filt_dim)
        window = 0.54 - 0.46 * np.cos(2 * math.pi * n / self.Filt_dim)
        # window = K.cast(window, "float32")
        # window = tf.Variable(window, name='sincnet_window', trainable=False)

        # TODO what is this?
        t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2,
                                       int((self.Filt_dim - 1) / 2),
                                       dtype=np.float32)
        t_right = np.float32(t_right_linspace / self.fs)
        # t_right = tf.Variable(t_right, name='sincnet_t_right', trainable=False, dtype=tf.float32)

        # Compute the filters.
        output_list = []
        for i in range(self.N_filt):
            low_pass1 = 2 * filt_beg_freq[i] * sinc(
                filt_beg_freq[i] * self.freq_scale, t_right)
            low_pass2 = 2 * filt_end_freq[i] * sinc(
                filt_end_freq[i] * self.freq_scale, t_right)
            band_pass = (low_pass2 - low_pass1)
            band_pass = band_pass / K.max(band_pass)
            output_list.append(band_pass * window)
        filters = K.stack(output_list)  # (80, 251)
        filters = K.transpose(filters)  # (251, 80)
        filters = K.reshape(
            filters, (self.Filt_dim, 1, self.N_filt)
        )  # (251,1,80) in TF: (filter_width, in_channels, out_channels) in PyTorch (out_channels, in_channels, filter_width)
        '''
        Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", 
        or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], 
        this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation.
        Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", 
        a tensor of shape [batch, in_width, in_channels] is reshaped to [batch, 1, in_width, in_channels], and the filter is reshaped to 
        [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] 
        (where out_width is a function of the stride and padding as in conv2d) and returned to the caller.
        '''

        out = K.conv1d(x, kernel=filters)

        return out
Пример #12
0
    def call(self, x):
        '''
        Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC",
        or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels],
        this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation.
        Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC",
        a tensor of shape [batch, in_width, in_channels] is reshaped to [batch, 1, in_width, in_channels], and the filter is reshaped to
        [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels]
        (where out_width is a function of the stride and padding as in conv2d) and returned to the caller.
        '''

        # Do the convolution.
        out = K.conv1d(x, kernel=self.generate_filters())

        return out
Пример #13
0
def conv1d(inputs: tf.Tensor, filter_: tf.Tensor, data_format: str = 'channels_first') -> tf.Tensor:
    """ Convolve filter_(1D) with each row of inputs

    inputs: Tensor of shape [batch_size, number_of_channels, signal_length]
    filter_: Tensor of shape [filter_length]
    """
    filter_: tf.Tensor = K.expand_dims(filter_)
    filter_: tf.Tensor = K.expand_dims(filter_)
    debug_tensor(LOGGER, filter_, 'c1d.fil')
    zero: tf.Tensor = K.constant(np.zeros(filter_.shape))
    debug_tensor(LOGGER, zero, 'c1d.zero')
    filter_first_row = K.concatenate([filter_, zero], axis=1)
    debug_tensor(LOGGER, filter_first_row, 'c1d.ffr')
    filter_second_row = K.concatenate([zero, filter_], axis=1)
    debug_tensor(LOGGER, filter_second_row, 'c1d.fsr')
    full_filter: tf.Tensor = K.concatenate([filter_first_row, filter_second_row], axis=2)
    debug_tensor(LOGGER, full_filter, 'c1d.ff')
    return K.conv1d(inputs, full_filter, data_format=data_format)
    def call(self, inputs):
        if self.rank == 1:
            outputs = K.conv1d(
                inputs,
                self.kernel,
                strides=self.strides[0],
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate[0],
            )
        if self.rank == 2:
            kernel = self.kernel
            if self.standardization:
                kernel_mean = K.mean(kernel, axis=[0, 1, 2], keepdims=True)
                kernel = kernel - kernel_mean
                kernel_std = K.std(kernel, axis=[0, 1, 2], keepdims=True)
                kernel = kernel / (kernel_std + 1e-5)
            outputs = K.conv2d(
                inputs,
                kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate,
            )
        if self.rank == 3:
            outputs = K.conv3d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate,
            )

        if self.use_bias:
            outputs = K.bias_add(outputs, self.bias, data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #15
0
    def call(self, x, **kwargs):
        debug_print("call")
        # filters = K.zeros(shape=(N_filt, Filt_dim))

        # Compute the filters.
        output_list = []
        for i in range(self.N_filt):
            low_pass1 = (
                2 * self.filt_beg_freq[i] *
                sinc(self.filt_beg_freq[i] * self.freq_scale, self.t_right))
            low_pass2 = (
                2 * self.filt_end_freq[i] *
                sinc(self.filt_end_freq[i] * self.freq_scale, self.t_right))
            band_pass = low_pass2 - low_pass1
            band_pass = band_pass / K.max(band_pass)
            output_list.append(band_pass * self.window)
        filters = K.stack(output_list)  # (80, 251)
        filters = K.transpose(filters)  # (251, 80)
        filters = K.reshape(
            filters, (self.Filt_dim, 1, self.N_filt)
        )  # (251,1,80) in TF: (filter_width, in_channels, out_channels) in
        # PyTorch (out_channels, in_channels, filter_width)
        """Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, 
        in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, 
        in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent 
        convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, 
        if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [
        batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. 
        The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the 
        stride and padding as in conv2d) and returned to the caller. """

        # Do the convolution.
        debug_print("call")
        debug_print("  x", x)
        debug_print("  filters", filters)
        out = K.conv1d(x, kernel=filters)
        debug_print("  out", out)

        return out
    def call(self, inputs):
        if self._is_causal:  # Apply causal padding to inputs for Conv1D.
            inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs))
        self.U = self.calcU()
        kernel = self.W * self.U
        outputs = K.conv1d(
            inputs,
            kernel,
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
            dilation_rate=self.dilation_rate)

        if self.use_bias:
            if self.data_format == 'channels_first':
                bias = array_ops.reshape(self.bias, (1, self.filters, 1))
                outputs += bias
            else:
                outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Пример #17
0
    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            # o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) # [2, 2]
            o = tf.einsum('bin,binj->bij', c, hat_inputs)
            # print(2, o.shape)
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                # b = K.batch_dot(o, hat_inputs, [2, 3])
                b = tf.einsum('bij,binj->bin', o, hat_inputs)

        return o
Пример #18
0
    def attention(self,
                  pre_q,
                  pre_v,
                  pre_k,
                  out_seq_len: int,
                  d_model: int,
                  mask=None,
                  training=None):
        """
        Calculates the output of the attention once the affine transformations
        of the inputs are done. Here's the shapes of the arguments:
        :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads)
        :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads)
        :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads)
        :param out_seq_len: the length of the output sequence
        :param d_model: dimensionality of the model (by the paper)
        :param training: Passed by Keras. Should not be defined manually.
          Optional scalar tensor indicating if we're in training
          or inference phase.
        """
        # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads)
        q = K.permute_dimensions(pre_q, [0, 2, 1, 3])
        v = K.permute_dimensions(pre_v, [0, 2, 1, 3])

        if self.compression_window_size is None:
            k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1])
        else:
            # Memory-compressed attention described in paper
            # "Generating Wikipedia by Summarizing Long Sequences"
            # (https://arxiv.org/pdf/1801.10198.pdf)
            # It compresses keys and values using 1D-convolution which reduces
            # the size of Q * K_transposed from roughly seq_len^2
            # to convoluted_seq_len^2. If we use strided convolution with
            # window size = 3 and stride = 3, memory requirements of such
            # memory-compressed attention will be 9 times smaller than
            # that of the original version.
            if self.use_masking:
                raise NotImplementedError(
                    "Masked memory-compressed attention has not "
                    "been implemented yet")
            k = K.permute_dimensions(pre_k, [0, 2, 1, 3])
            k, v = [
                K.reshape(
                    # Step 3: Return the result to its original dimensions
                    # (batch_size, num_heads, seq_len, d_model//heads)
                    K.bias_add(
                        # Step 3: ... and add bias
                        K.conv1d(
                            # Step 2: we "compress" K and V using strided conv
                            K.reshape(
                                # Step 1: we reshape K and V to
                                # (batch + num_heads,  seq_len, d_model//heads)
                                item,
                                (-1, K.int_shape(item)[-2],
                                 d_model // self.num_heads)),
                            kernel,
                            strides=self.compression_window_size,
                            padding='valid',
                            data_format='channels_last'),
                        bias,
                        data_format='channels_last'),
                    # new shape
                    K.concatenate(
                        [K.shape(item)[:2], [-1, d_model // self.num_heads]]))
                for item, kernel, bias in ((k, self.k_conv_kernel,
                                            self.k_conv_bias),
                                           (v, self.v_conv_kernel,
                                            self.v_conv_bias))
            ]
            k_transposed = K.permute_dimensions(k, [0, 1, 3, 2])
        # shaping K into (batch_size, num_heads, d_model//heads, seq_len)
        # for further matrix multiplication
        sqrt_d = K.constant(np.sqrt(d_model // self.num_heads),
                            dtype=K.floatx())
        q_shape = tf.shape(q)
        k_t_shape = tf.shape(k_transposed)
        v_shape = tf.shape(v)
        # before performing batch_dot all tensors are being converted to 3D
        # shape (batch_size * num_heads, rows, cols) to make sure batch_dot
        # performs identically on all backends
        attention_heads = K.reshape(
            K.batch_dot(
                self.apply_dropout_if_needed(K.softmax(
                    self.mask_attention_if_needed(K.batch_dot(
                        K.reshape(q, tf.stack((-1, q_shape[-2], q_shape[-1]))),
                        K.reshape(k_transposed,
                                  tf.stack(
                                      (-1, k_t_shape[-2], k_t_shape[-1])))) /
                                                  sqrt_d,
                                                  mask=mask)),
                                             training=training),
                K.reshape(v, tf.stack((-1, v_shape[-2], v_shape[-1])))),
            tf.stack((-1, self.num_heads, q_shape[-2], v_shape[-1])))
        attention_heads_merged = K.reshape(
            K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, d_model))
        if out_seq_len is None:
            output_shape = tf.stack([-1, tf.shape(pre_k)[1], d_model])
        else:
            output_shape = (-1, out_seq_len, d_model)
        attention_out = K.reshape(
            K.dot(attention_heads_merged, self.output_weights), output_shape)
        return attention_out
Пример #19
0
    def call(self, x):
        out = K.conv1d(x, kernel=self.filters)

        return out