Пример #1
0
    def call(self, V, training):
        query = tf.matmul(V, self.wan_query) + self.wan_query_b
        query = self.bn1(query, training=training)
        query = activations.elu(query)

        key = tf.matmul(V, self.wan_key) + self.wan_key_b
        key = self.bn2(key, training=training)
        key = tf.transpose(key, [0, 2, 1])
        key = activations.elu(key)

        weights = tf.matmul(query, key) + self.wan_weights_b
        weights = self.bn3(weights, training=training)
        #weights = layers.ELU(weights)
        weights = tf.nn.softmax(weights, axis=1)

        m1 = tf.matmul(weights, V)
        m1 = self.bn4(m1, training=training)
        return activations.elu(m1)
Пример #2
0
    def call(self, inputs, training):
        x = self.embedding(inputs)

        x1 = self.wan(x, training)
        #x1 = self.dp(x1)
        x2 = self.lan(x1, training)
        #x2 = self.dp(x2)
        x = self.can(x1, x2, training)
        #x = self.dp(x)

        for _ in range(self.SETTINGS.LAYERS):
            # MANN layer
            x1 = self.wan(x, training)
            x2 = self.lan(x1, training)
            x = self.can(x1, x2, training)

        #x = self.pooling(x)
        #x = self.conv1(x)
        x = self.conv2(x)
        x = self.dp(x)
        x = self.bn1(x, training=training)
        x = activations.elu(x)
        #x = self.bn3(x, training=training)
        x = self.conv3(x)
        x = self.dp(x)
        x = self.bn2(x, training=training)
        x = activations.elu(x)
        #x = self.bn4(x, training=training)
        #x = self.conv4(x)
        #x = self.bn5(x, training=training)
        x = self.pooling(x)
        x = self.flatten(x)
        x = self.dp(x)
        #x = self.Dense1(x)
        #x = self.dp(x)
        #x = self.Dense2(x)
        #x = self.dp(x)
        x = self.Dense3(x)
        x = self.dp(x)
        x = self.bn3(x, training)
        x = activations.elu(x)
        #x = self.dp(x)
        x = self.Dense4(x)
        return x
Пример #3
0
def selu(x):
    """Scaled Exponential Linear Unit. (Klambauer et al., 2017)
    # Arguments
        x: A tensor or variable to compute the activation function for.
    # References
        - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
    """
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * elu(x, alpha)
Пример #4
0
    def call(self, m1, m2, training):
        query = tf.matmul(m1, self.q) + self.q_b
        query = self.bn1(query, training=training)
        query = activations.elu(query)

        key = tf.matmul(m2, self.k) + self.k_b
        key = self.bn2(key, training=training)
        key = activations.elu(key)

        weights = tf.matmul(query, tf.transpose(key,
                                                [0, 2, 1])) + self.weights_b
        weights = self.bn3(weights, training=training)
        #weights = tf.nn.relu(weights)
        weights = tf.nn.softmax(weights, axis=1)

        m3 = tf.matmul(
            weights,
            tf.matmul(m2, self.v2) +
            tf.matmul(weights, tf.matmul(m1, self.v1)) + self.m3_b)
        m3 = self.bn4(m3, training=training)
        return activations.elu(m3)
Пример #5
0
    def call(self, m1, training):
        d = tf.zeros([
            self.SETTINGS.BATCH_SIZE, 1,
            self.SETTINGS.WINDOW_SIZE * self.SETTINGS.EMB_DIM
        ])
        s = math.floor(self.SETTINGS.WINDOW_SIZE / 2)
        pad = tf.zeros([self.SETTINGS.BATCH_SIZE, s, self.SETTINGS.EMB_DIM])
        m1 = tf.concat([pad, m1, pad], axis=1)

        for i in range(s, self.SETTINGS.MAX_LEN + s):
            t = tf.reshape(m1[:, i - s:i + s + 1], [
                self.SETTINGS.BATCH_SIZE, 1,
                self.SETTINGS.WINDOW_SIZE * self.SETTINGS.EMB_DIM
            ])
            d = tf.concat([d, t], axis=1)
        d = d[:, 1:]

        l = tf.matmul(d, self.f) + self.lan_key_b
        l = self.bn1(l, training=training)
        l = activations.elu(l)

        query_2 = tf.matmul(l, self.lan_query) + self.lan_query_b
        query_2 = self.bn2(query_2, training=training)
        query_2 = activations.elu(query_2)

        key_2 = tf.matmul(l, self.lan_key) + self.lan_key_b
        key_2 = self.bn3(key_2, training=training)
        key_2 = activations.elu(key_2)

        weights = tf.matmul(query_2, tf.transpose(
            key_2, [0, 2, 1])) + self.lan_weights_b
        weights = self.bn4(weights, training=training)
        #weights = layers.ELU(weights)
        weights = tf.nn.softmax(weights, axis=1)

        l = tf.matmul(weights, l)
        l = self.bn5(l, training=training)
        return activations.elu(l)
Пример #6
0
    def call(self, inputs):
        mean, var, *adj = inputs
        #         assert len(adj) == 2

        mean = activations.elu(mean @ self.kernel_mean)
        var = activations.relu(var @ self.kernel_var)

        attention = tf.math.exp(-self.gamma * var)
        mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
        var = tf.sparse.sparse_dense_matmul(adj[1],
                                            var * attention * attention)

        if self.use_bias:
            mean += self.bias_mean
            var += self.bias_var

        return self.activation(mean), self.activation(var)
Пример #7
0
def forward(inputs, weights):

    # 第一层
    x, *adj = inputs
    h = x @ weights[0]

    mean = activations.elu(h)
    var = activations.relu(h)

    attention = tf.exp(-var)
    mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
    var = tf.sparse.sparse_dense_matmul(adj[1], var * attention * attention)
    mean = activations.elu(mean)
    var = activations.elu(var)

    # 中间层
    i = 1
    while i < len(weights) - 2:

        mean = activations.elu(mean @ weights[i])
        var = activations.relu(var @ weights[i + 1])

        attention = tf.math.exp(-var)
        mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
        var = tf.sparse.sparse_dense_matmul(adj[1],
                                            var * attention * attention)

        mean = activations.elu(mean)
        var = activations.elu(var)
        i += 2

    # 输出层
    mean = activations.elu(mean @ weights[i])
    var = activations.relu(var @ weights[i + 1])

    attention = tf.math.exp(-var)
    mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
    var = tf.sparse.sparse_dense_matmul(adj[1], var * attention * attention)

    # 采样层
    sample = tf.random.normal(tf.shape(var), 0, 1, dtype='float32')
    output = mean + tf.math.sqrt(var + 1e-8) * sample

    return output
Пример #8
0
    def call(self, inputs):
        x, *adj = inputs
#         assert len(adj) == 2

        mean = tf.slice(x, [0, 0], [-1, self.dim])
        var = tf.slice(x, [0, self.dim], [-1, self.dim])

        mean = activations.elu(mean @ self.kernel_mean)
        var = activations.relu(var @ self.kernel_var)

        attention = tf.math.exp(-self.gamma*var)
        mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
        var = tf.sparse.sparse_dense_matmul(adj[1], var * attention * attention)

        if self.use_bias:
            mean += self.bias_mean
            var += self.bias_var

        sample = tf.random.normal(tf.shape(var), 0, 1, dtype=tf.float32)
        output = mean + tf.math.sqrt(var + 1e-8) * sample
        return self.activation(output)
Пример #9
0
    def call(self, inputs):
        x, *adj = inputs
#         assert len(adj) == 2

        h = x @ self.kernel

        if self.use_bias:
            h += self.bias

        mean = activations.elu(tf.slice(h, [0, 0], [-1, self.dim]))
        var = activations.relu(tf.slice(h, [0, self.dim], [-1, self.dim]))

        KL_divergence = 0.5 * tf.reduce_mean(tf.math.square(mean) + var - tf.math.log(1e-8 + var) - 1, axis=1)
        KL_divergence = tf.reduce_sum(KL_divergence)

        attention = tf.exp(-self.gamma*var)
        mean = tf.sparse.sparse_dense_matmul(adj[0], mean * attention)
        var = tf.sparse.sparse_dense_matmul(adj[1], var * attention * attention)

        output = tf.concat([mean, var], axis=1)

        return self.activation(output), KL_divergence
    ###relu 比 sigmoid 和 tanh 快;(梯度不会饱和,解决了梯度消失问题
    ##缺点L:训练的时候很”脆弱”,因为当取负号的时候 ,它的导数为零,预示着后半段就没什么作用了。
    x = tf.linspace(-5., 5., 100)  # 构造一段连续的数据
    x_ndarray = x.numpy()  # 转换为 ndarray 的类型
    y_relu = activations.relu(x)

    plt.plot(x, y_relu, c='red', label='relu')  # 画折线图
    plt.ylim((-0.5, 1.2))
    plt.legend(loc='best')
    plt.show()

    ##elu为解决ReLU存在的问题而提出。Elu激活函数有优点:ReLU的基本所有优点、不会有Dead ReLU问题,输出的均值接近0、零中心点问题。Elu激活函数有缺点:计算量稍大,原点不可导。
    x = tf.linspace(-5., 5., 100)  # 构造一段连续的数据
    x_ndarray = x.numpy()  # 转换为 ndarray 的类型
    y_relu = activations.elu(x)

    plt.plot(x, y_relu, c='red', label='elu')  # 画折线图
    plt.ylim((-2, 5))
    plt.legend(loc='best')
    plt.show()

    ###selu
    ###其实就是ELU乘了个lambda,关键在于这个lambda是大于1的。以前relu,prelu,elu这些激活函数,都是在负半轴坡度平缓,这样在activation的方差过大的时候可以让它减小,防止了梯度爆炸,但是正半轴坡度简单的设成了1。而selu的正半轴大于1,在方差过小的的时候可以让它增大,同时防止了梯度消失。这样激活函数就有一个不动点,网络深了以后每一层的输出都是均值为0方差为1。
    x = tf.linspace(-5., 5., 100)  # 构造一段连续的数据
    x_ndarray = x.numpy()  # 转换为 ndarray 的类型
    y_relu = activations.selu(x)

    plt.plot(x, y_relu, c='red', label='selu')  # 画折线图
    plt.ylim((-2, 5))
    plt.legend(loc='best')
#
# $$
# g(z) = \frac{e^{z} - e^{-z} }{e^{z} + e^{-z}}
# $$
#
# The figure below illustrated these functions.

# In[2]:

from tensorflow.keras import activations
import numpy as np
import matplotlib.pylab as plt

z = np.linspace(-7, 7, 500)
h_relu = activations.relu(z).numpy()
h_elu = activations.elu(z).numpy()
h_selu = activations.selu(z).numpy()
h_tanh = activations.tanh(z).numpy()

fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
fig.set_figheight(8)
fig.set_figwidth(10)

axes[0, 0].plot(z, h_relu)
axes[0, 0].set_xlabel(r'$z$')
axes[0, 0].set_ylabel(r'$g(z)$')
axes[0, 0].set_title('ReLU')

axes[0, 1].plot(z, h_elu)
axes[0, 1].set_xlabel(r'$z$')
axes[0, 1].set_ylabel(r'$g(z)$')