def test_gradient(self): val = np.random.random((4, 2)) xth = KTH.variable(val) xtf = KTF.variable(val) expth = xth * KTH.exp(xth) exptf = xtf * KTF.exp(xtf) lossth = KTH.sum(expth) losstf = KTF.sum(exptf) zero_lossth = KTH.stop_gradient(lossth) zero_losstf = KTF.stop_gradient(losstf) gradth = KTH.gradients(lossth, [expth]) gradtf = KTF.gradients(losstf, [exptf]) zero_gradth = KTH.gradients(lossth + zero_lossth, [expth]) zero_gradtf = KTF.gradients(losstf + zero_losstf, [exptf]) zth = KTH.eval(gradth[0]) ztf = KTF.eval(gradtf[0]) zero_zth = KTH.eval(zero_gradth[0]) zero_ztf = KTF.eval(zero_gradtf[0]) assert zth.shape == ztf.shape assert zero_zth.shape == zero_ztf.shape assert_allclose(zth, ztf, atol=1e-05) assert_allclose(zero_zth, zero_ztf, atol=1e-05) assert_allclose(zero_zth, zth, atol=1e-05) assert_allclose(zero_ztf, ztf, atol=1e-05)
def get_initial_states(self, X): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(X) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.zeros((self.output_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def get_initial_states(self, x): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] if self.feed_layer is not None: initial_states[1] = self.feed_layer return initial_states