def layer_norm(x, w, b, e=1e-5): sizes = x.get_output_shape()[1:] u = auto.mean(x, len(sizes), True) s = auto.mean(auto.square(x - u), len(sizes), True) y = (x - u) / auto.sqrt(s + e) y = y * w + b return y
def gelu(self, x): y = (auto.square(x) * x * 0.044715 + x) * (math.sqrt(2 / math.pi)) y = Activation("tanh")(y) + 1.0 y = x * 0.5 * y return y