def _step(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1, u_i, u_f, u_o, u_c): i_t = hard_sigmoid(xi_t + T.dot(h_tm1, u_i)) f_t = hard_sigmoid(xf_t + T.dot(h_tm1, u_f)) c_t = f_t * c_tm1 + i_t * tanh(xc_t + T.dot(h_tm1, u_c)) o_t = hard_sigmoid(xo_t + T.dot(h_tm1, u_o)) h_t = o_t * tanh(c_t) return h_t, c_t
def propagate_gru(weight, inputs, states, units=128): kernel = K.variable(weight[0]) # shape=(input_dim, self.units * 3) recurrent_kernel = K.variable( weight[1]) # shape=(self.units, self.units * 3) bias = K.variable(weight[2]) # bias_shape = (3 * self.units,) # build weights # update gate kernel_z = kernel[:, :units] recurrent_kernel_z = recurrent_kernel[:, :units] # reset gate kernel_r = kernel[:, units:units * 2] recurrent_kernel_r = recurrent_kernel[:, units:units * 2] # new gate kernel_h = kernel[:, units * 2:] recurrent_kernel_h = recurrent_kernel[:, units * 2:] # assume use bias, not reset_after input_bias_z = bias[:units] input_bias_r = bias[units:units * 2] input_bias_h = bias[units * 2:] # bias for hidden state - just for compatibility with CuDNN # call inputs = K.variable(inputs) # not sure states = K.variable(states) # not sure h_tm1 = states # previous memory # assume no dropout in this layer and self.implementation = 1 and not reset_after inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.bias_add(K.dot(inputs_z, kernel_z), input_bias_z) x_r = K.bias_add(K.dot(inputs_r, kernel_r), input_bias_r) x_h = K.bias_add(K.dot(inputs_h, kernel_h), input_bias_h) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) z = hard_sigmoid(x_z + recurrent_z) # recurrent activation = 'hard_sigmoid' r = hard_sigmoid(x_r + recurrent_r) recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) hh = tanh(x_h + recurrent_h) # activation = 'tanh' # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh #print(r.shape, z.shape, h.shape, hh.shape) # (100, 128) (100, 128) (100, 128) (100, 128) #[0. 0. 0.22 0.76 1. 1. 1. ] #[0. 0. 0. 0.33 1. 1. 1. ] #[-1. -1. -0.87 0.05 0.90 1. 1. ] #[-1. -1. -0.99 0.17 0.99 1. 1. ] #for w in [r, z, h, hh]: #w = K.get_value(w) #print(np.percentile(w, [0, 1, 25, 50, 75, 99, 100])) return {'r': r, 'z': z, 'h': h, 'hh': hh}
def _split_and_apply_activations(self, controller_output): """ This takes the controller output, splits it in ntm_output, read and wright adressing data. It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write. ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions, consisting of an erase and an add vector. As it is necesseary for stable results, k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!), shift via softmax, gamma is sigmoided, inversed and clipped (probably not ideal) g is sigmoided, beta is linear (probably not ideal!) """ # splitting ntm_output, controller_instructions_read, controller_instructions_write = tf.split( controller_output, np.asarray([self.output_dim, self.read_heads * self.controller_read_head_emitting_dim, self.write_heads * self.controller_write_head_emitting_dim]), axis=1) controller_instructions_read = tf.split( controller_instructions_read, self.read_heads, axis=1) controller_instructions_write = tf.split( controller_instructions_write, self.write_heads, axis=1) controller_instructions_read = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for single_head_data in controller_instructions_read] controller_instructions_write = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for single_head_data in controller_instructions_write] # activation ntm_output = self.activation(ntm_output) controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for (k, beta, g, shift, gamma) in controller_instructions_read] controller_instructions_write = [ (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write] return (ntm_output, controller_instructions_read, controller_instructions_write)
def test_hard_sigmoid(self): def ref_hard_sigmoid(x): x = (x * 0.2) + 0.5 z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) return z hard_sigmoid = np.vectorize(ref_hard_sigmoid) x = backend.placeholder(ndim=2) f = backend.function([x], [activations.hard_sigmoid(x)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = hard_sigmoid(test_values) self.assertAllClose(result, expected, rtol=1e-05)
def step_backward(inputs, states): h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state x_i = tf.tensordot(inputs, self.kernel_i_backward,axes=[[2],[0]]) x_f = tf.tensordot(inputs, self.kernel_f_backward,axes=[[2],[0]]) x_c = tf.tensordot(inputs, self.kernel_c_backward,axes=[[2],[0]]) x_o = tf.tensordot(inputs, self.kernel_o_backward,axes=[[2],[0]]) x_i = K.bias_add(x_i, self.bias_i_backward) x_f = K.bias_add(x_f, self.bias_f_backward) x_c = K.bias_add(x_c, self.bias_c_backward) x_o = K.bias_add(x_o, self.bias_o_backward) i = activations.hard_sigmoid(x_i + tf.tensordot(h_tm1, self.recurrent_kernel_i_backward,axes=[[2],[0]])) f = activations.hard_sigmoid(x_f + tf.tensordot(h_tm1, self.recurrent_kernel_f_backward,axes=[[2],[0]])) c = f * c_tm1 + i * activations.tanh(x_c + tf.tensordot(h_tm1, self.recurrent_kernel_c_backward,axes=[[2],[0]])) o = activations.hard_sigmoid(x_o + tf.tensordot(h_tm1, self.recurrent_kernel_o_backward,axes=[[2],[0]])) h = o * activations.tanh(c) return h, [h, c]
def test_hard_sigmoid(): """Test using a reference hard sigmoid implementation. """ def ref_hard_sigmoid(x): x = (x * 0.2) + 0.5 z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) return z hard_sigmoid = np.vectorize(ref_hard_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.hard_sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = hard_sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05)
def test_hard_sigmoid(): ''' Test using a reference hard sigmoid implementation ''' def ref_hard_sigmoid(x): ''' Reference hard sigmoid with slope and shift values from theano, see https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py ''' x = (x * 0.2) + 0.5 z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) return z hard_sigmoid = np.vectorize(ref_hard_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.hard_sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = hard_sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05)
# linear activation function acttf = kact.linear(nettf) # need to convert from TensorFlow tensors to numpy arrays before plotting # eval() is called because TensorFlow tensors have no values until they are "run" plt_act(nettf.eval(), acttf.eval(), 'linear activation function') # relu activation function acttf = kact.relu(nettf) plt_act(nettf.eval(), acttf.eval(), 'rectified linear (relu)') # sigmoid activation function acttf = kact.sigmoid(nettf) plt_act(nettf.eval(), acttf.eval(), 'sigmoid') # hard sigmoid activation function acttf = kact.hard_sigmoid(nettf) plt_act(nettf.eval(), acttf.eval(), 'hard sigmoid') # tanh activation function acttf = kact.tanh(nettf) plt_act(nettf.eval(), acttf.eval(), 'tanh') # softsign activation function acttf = kact.softsign(nettf) plt_act(nettf.eval(), acttf.eval(), 'softsign') # close the TensorFlow session session.close() # done print('Done!')