def test_loss_updates_one_layer_positive_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) # hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = list(mlp.layers[0].W.eval()) expected_weights = [[1,0], [1,0], [1,0], [1,0]] self.assertEqual(actual_loss, expected_loss) self.assertTrue(np.array_equal(actual_weights, expected_weights))
def test_loss_updates_one_layer_positive_features_with_negative_weights_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = mlp.layers[0].W.eval().tolist() expected_weights = [[-1,-1], [-1,-1], [-1,-1], [-1,-1]] self.assertEqual(actual_loss, expected_loss) self.assertSequenceEqual(actual_weights, expected_weights)
def test_loss_updates_one_layer_positive_features_with_negative_weights_relu( self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function([features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1, 1, 1, 1] action = 0 reward = 1 next_features = [1, 1, 1, 1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = mlp.layers[0].W.eval().tolist() expected_weights = [[-1, -1], [-1, -1], [-1, -1], [-1, -1]] self.assertEqual(actual_loss, expected_loss) self.assertSequenceEqual(actual_weights, expected_weights)
def test_loss_updates_one_layer_positive_diff_action_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) # hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function([features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1, 1, 1, 1] action = 1 reward = 1 next_features = [1, 1, 1, 1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = list(mlp.layers[0].W.eval()) expected_weights = [[0, 1], [0, 1], [0, 1], [0, 1]] self.assertEqual(actual_loss, expected_loss) self.assertTrue(np.array_equal(actual_weights, expected_weights))