示例#1
0
    def _step(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1, u_i, u_f, u_o, u_c):

        i_t = hard_sigmoid(xi_t + T.dot(h_tm1, u_i))
        f_t = hard_sigmoid(xf_t + T.dot(h_tm1, u_f))
        c_t = f_t * c_tm1 + i_t * tanh(xc_t + T.dot(h_tm1, u_c))
        o_t = hard_sigmoid(xo_t + T.dot(h_tm1, u_o))
        h_t = o_t * tanh(c_t)
        return h_t, c_t
示例#2
0
    def propagate_gru(weight, inputs, states, units=128):
        kernel = K.variable(weight[0])  # shape=(input_dim, self.units * 3)
        recurrent_kernel = K.variable(
            weight[1])  # shape=(self.units, self.units * 3)
        bias = K.variable(weight[2])  # bias_shape = (3 * self.units,)
        # build weights
        # update gate
        kernel_z = kernel[:, :units]
        recurrent_kernel_z = recurrent_kernel[:, :units]
        # reset gate
        kernel_r = kernel[:, units:units * 2]
        recurrent_kernel_r = recurrent_kernel[:, units:units * 2]
        # new gate
        kernel_h = kernel[:, units * 2:]
        recurrent_kernel_h = recurrent_kernel[:, units * 2:]

        # assume use bias, not reset_after
        input_bias_z = bias[:units]
        input_bias_r = bias[units:units * 2]
        input_bias_h = bias[units * 2:]
        # bias for hidden state - just for compatibility with CuDNN

        # call
        inputs = K.variable(inputs)  # not sure
        states = K.variable(states)  # not sure
        h_tm1 = states  # previous memory

        # assume no dropout in this layer and self.implementation = 1 and not reset_after
        inputs_z = inputs
        inputs_r = inputs
        inputs_h = inputs

        x_z = K.bias_add(K.dot(inputs_z, kernel_z), input_bias_z)
        x_r = K.bias_add(K.dot(inputs_r, kernel_r), input_bias_r)
        x_h = K.bias_add(K.dot(inputs_h, kernel_h), input_bias_h)

        recurrent_z = K.dot(h_tm1, recurrent_kernel_z)
        recurrent_r = K.dot(h_tm1, recurrent_kernel_r)

        z = hard_sigmoid(x_z +
                         recurrent_z)  # recurrent activation = 'hard_sigmoid'
        r = hard_sigmoid(x_r + recurrent_r)

        recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h)
        hh = tanh(x_h + recurrent_h)  # activation = 'tanh'
        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh

        #print(r.shape, z.shape, h.shape, hh.shape) # (100, 128) (100, 128) (100, 128) (100, 128)
        #[0.   0.   0.22  0.76 1.   1.   1. ]
        #[0.   0.   0.    0.33 1.   1.   1. ]
        #[-1.  -1.  -0.87 0.05 0.90 1.   1. ]
        #[-1.  -1.  -0.99 0.17 0.99 1.   1. ]
        #for w in [r, z, h, hh]:
        #w = K.get_value(w)
        #print(np.percentile(w, [0, 1, 25, 50, 75, 99, 100]))
        return {'r': r, 'z': z, 'h': h, 'hh': hh}
示例#3
0
    def _step(self,
        xi_t, xf_t, xc_t, xo_t,
        h_tm1, c_tm1,
        u_i, u_f, u_o, u_c):

        i_t = hard_sigmoid(xi_t + T.dot(h_tm1, u_i))
        f_t = hard_sigmoid(xf_t + T.dot(h_tm1, u_f))
        c_t = f_t * c_tm1 + i_t * tanh(xc_t + T.dot(h_tm1, u_c))
        o_t = hard_sigmoid(xo_t + T.dot(h_tm1, u_o))
        h_t = o_t * tanh(c_t)
        return h_t, c_t
示例#4
0
    def _split_and_apply_activations(self, controller_output):
        """ This takes the controller output, splits it in ntm_output, read and wright adressing data.
            It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
            ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
            the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
            consisting of an erase and an add vector.

            As it is necesseary for stable results,
            k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
            shift via softmax,
            gamma is sigmoided, inversed and clipped (probably not ideal)
            g is sigmoided,
            beta is linear (probably not ideal!) """

        # splitting
        ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
            controller_output,
            np.asarray([self.output_dim,
                        self.read_heads * self.controller_read_head_emitting_dim,
                        self.write_heads * self.controller_write_head_emitting_dim]),
            axis=1)

        controller_instructions_read = tf.split(
            controller_instructions_read, self.read_heads, axis=1)
        controller_instructions_write = tf.split(
            controller_instructions_write, self.write_heads, axis=1)

        controller_instructions_read = [
            tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for
            single_head_data in controller_instructions_read]

        controller_instructions_write = [
            tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for
            single_head_data in controller_instructions_write]

        # activation
        ntm_output = self.activation(ntm_output)
        controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
                                        (k, beta, g, shift, gamma) in controller_instructions_read]
        controller_instructions_write = [
            (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for
            (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]

        return (ntm_output, controller_instructions_read, controller_instructions_write)
示例#5
0
 def test_hard_sigmoid(self):
   def ref_hard_sigmoid(x):
     x = (x * 0.2) + 0.5
     z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
     return z
   hard_sigmoid = np.vectorize(ref_hard_sigmoid)
   x = backend.placeholder(ndim=2)
   f = backend.function([x], [activations.hard_sigmoid(x)])
   test_values = np.random.random((2, 5))
   result = f([test_values])[0]
   expected = hard_sigmoid(test_values)
   self.assertAllClose(result, expected, rtol=1e-05)
            def step_backward(inputs, states):
                h_tm1 = states[0]  # previous memory state
                c_tm1 = states[1]  # previous carry state

                x_i = tf.tensordot(inputs, self.kernel_i_backward,axes=[[2],[0]])
                x_f = tf.tensordot(inputs, self.kernel_f_backward,axes=[[2],[0]])
                x_c = tf.tensordot(inputs, self.kernel_c_backward,axes=[[2],[0]])
                x_o = tf.tensordot(inputs, self.kernel_o_backward,axes=[[2],[0]])
                x_i = K.bias_add(x_i, self.bias_i_backward)
                x_f = K.bias_add(x_f, self.bias_f_backward)
                x_c = K.bias_add(x_c, self.bias_c_backward)
                x_o = K.bias_add(x_o, self.bias_o_backward)
                i = activations.hard_sigmoid(x_i + tf.tensordot(h_tm1,
                                                          self.recurrent_kernel_i_backward,axes=[[2],[0]]))
                f = activations.hard_sigmoid(x_f + tf.tensordot(h_tm1,
                                                          self.recurrent_kernel_f_backward,axes=[[2],[0]]))
                c = f * c_tm1 + i * activations.tanh(x_c + tf.tensordot(h_tm1,
                                                                self.recurrent_kernel_c_backward,axes=[[2],[0]]))
                o = activations.hard_sigmoid(x_o + tf.tensordot(h_tm1,
                                                          self.recurrent_kernel_o_backward,axes=[[2],[0]]))
                h = o * activations.tanh(c)

                return h, [h, c]
def test_hard_sigmoid():
    """Test using a reference hard sigmoid implementation.
    """
    def ref_hard_sigmoid(x):
        x = (x * 0.2) + 0.5
        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
        return z
    hard_sigmoid = np.vectorize(ref_hard_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x], [activations.hard_sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = hard_sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05)
def test_hard_sigmoid():
    '''
    Test using a reference hard sigmoid implementation
    '''
    def ref_hard_sigmoid(x):
        '''
        Reference hard sigmoid with slope and shift values from theano, see
        https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
        '''
        x = (x * 0.2) + 0.5
        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
        return z
    hard_sigmoid = np.vectorize(ref_hard_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x],  [activations.hard_sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = hard_sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05)
示例#9
0
def test_hard_sigmoid():
    '''
    Test using a reference hard sigmoid implementation
    '''
    def ref_hard_sigmoid(x):
        '''
        Reference hard sigmoid with slope and shift values from theano, see
        https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
        '''
        x = (x * 0.2) + 0.5
        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
        return z
    hard_sigmoid = np.vectorize(ref_hard_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x],  [activations.hard_sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = hard_sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05)
示例#10
0
# linear activation function
acttf = kact.linear(nettf)
# need to convert from TensorFlow tensors to numpy arrays before plotting
# eval() is called because TensorFlow tensors have no values until they are "run"
plt_act(nettf.eval(), acttf.eval(), 'linear activation function')

# relu activation function
acttf = kact.relu(nettf)
plt_act(nettf.eval(), acttf.eval(), 'rectified linear (relu)')

# sigmoid activation function
acttf = kact.sigmoid(nettf)
plt_act(nettf.eval(), acttf.eval(), 'sigmoid')

# hard sigmoid activation function
acttf = kact.hard_sigmoid(nettf)
plt_act(nettf.eval(), acttf.eval(), 'hard sigmoid')

# tanh activation function
acttf = kact.tanh(nettf)
plt_act(nettf.eval(), acttf.eval(), 'tanh')

# softsign activation function
acttf = kact.softsign(nettf)
plt_act(nettf.eval(), acttf.eval(), 'softsign')

# close the TensorFlow session
session.close()

# done
print('Done!')