def test_multi_env(self): state = State(torch.randn(2, 2)) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor([[0.3923, -0.2236, 0.], [-0.3195, -1.2050, 0.]]), atol=1e-04) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor([[0.3923, -0.2236, 1e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04) self.agent.act(State(state.features, torch.tensor([1., 0.])), 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor([[0.3923, -0.2236, 2e-3], [-0.3195, -1.2050, 2e-3]]), atol=1e-04) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor([[0.3923, -0.2236, 3e-3], [-0.3195, -1.2050, 0.]]), atol=1e-04) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor([[0.3923, -0.2236, 4e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04)
def testScattering(self): data = torch.load('test/test_data.pt') x = data['x'] S = data['S'] scat = Scattering(128, 128, 4, pre_pad=False, jit=True) scat.cuda() x = x.cuda() S = S.cuda() tt.assert_allclose(S.cpu(), scat(x).cpu(), atol=1e-6) scat = Scattering(128, 128, 4, pre_pad=False, jit=False) Sg = [] Sc = [] for gpu in [True, False]: if gpu: x = x.cuda() scat.cuda() Sg = scat(x) else: x = x.cpu() scat.cpu() Sc = scat(x) """there are huge round off errors with fftw, numpy fft, cufft... and the kernels of periodization. We do not wish to play with that as it is meaningless.""" tt.assert_allclose(Sg.cpu(), Sc.cpu(), atol=1e-1)
def test_rollout(self): buffer = NStepAdvantageBuffer(self.v, self.features, 2, 3, discount_factor=0.5) actions = torch.ones((3)) states = State(torch.arange(0, 12).unsqueeze(1)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) states, _, advantages = buffer.advantages(states[6:9]) expected_states = State(torch.arange(0, 6).unsqueeze(1)) expected_next_states = State( torch.cat((torch.arange(6, 9), torch.arange(6, 9))).unsqueeze(1)) expected_returns = torch.tensor([0.5, 0.5, 0.5, 1, 1, 1]).float() expected_lengths = torch.tensor([2., 2, 2, 1, 1, 1]) self.assert_states_equal(states, expected_states) tt.assert_allclose( advantages, self._compute_expected_advantages(expected_states, expected_returns, expected_next_states, expected_lengths))
def test_reset(self): state = State(torch.randn(1, 4)) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor( [[0.3923, -0.2236, -0.3195, -1.2050, 0.0000]]), atol=1e-04) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor( [[0.3923, -0.2236, -0.3195, -1.2050, 1e-3]]), atol=1e-04) self.agent.act(State(state.features, DONE), 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor( [[0.3923, -0.2236, -0.3195, -1.2050, 2e-3]]), atol=1e-04) self.agent.act(State(state.features), 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor( [[0.3923, -0.2236, -0.3195, -1.2050, 0.0000]]), atol=1e-04) self.agent.act(state, 0) tt.assert_allclose(self.test_agent.last_state.features, torch.tensor( [[0.3923, -0.2236, -0.3195, -1.2050, 1e-3]]), atol=1e-04)
def test_target(self): self.policy = DeterministicPolicy( self.model, self.optimizer, self.space, target=FixedTarget(3) ) # choose initial action state = State(torch.ones(1, STATE_DIM)) action = self.policy(state) tt.assert_equal(action, torch.zeros(1, ACTION_DIM)) # run update step, make sure target network doesn't change action.sum().backward(retain_graph=True) self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # again... action.sum().backward(retain_graph=True) self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # third time, target should be updated action.sum().backward(retain_graph=True) self.policy.step() tt.assert_allclose( self.policy.eval(state), torch.tensor([[-0.595883, -0.595883, -0.595883]]), atol=1e-4, )
def test_target(self): self.policy = DeterministicPolicy(self.model, self.optimizer, self.space, target=FixedTarget(3)) state = State(torch.ones(1, STATE_DIM)) # run update step, make sure target network doesn't change self.policy(state).sum().backward() self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # again... self.policy(state).sum().backward() self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # third time, target should be updated self.policy(state).sum().backward() self.policy.step() tt.assert_allclose( self.policy.target(state), torch.tensor([[-0.574482, -0.574482, -0.574482]]), atol=1e-4, )
def test_multi_env(self): state = StateArray(torch.randn(2, 2), (2, )) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor([[0.3923, -0.2236, 0.], [-0.3195, -1.2050, 0.]]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor([[0.3923, -0.2236, 1e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04) self.agent.act( StateArray(state.observation, (2, ), done=torch.tensor([False, True]))) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor([[0.3923, -0.2236, 2e-3], [-0.3195, -1.2050, 2e-3]]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor([[0.3923, -0.2236, 3e-3], [-0.3195, -1.2050, 0.]]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor([[0.3923, -0.2236, 4e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04)
def test_rollout_with_nones(self): buffer = NStepBatchBuffer(3, 3, discount_factor=0.5) done = torch.ones(12) done[5] = 0 done[7] = 0 done[9] = 0 states = State(torch.arange(0, 12), done) actions = torch.ones((3)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) buffer.store(states[9:12], actions, 4 * torch.ones(3)) states, actions, returns, next_states, lengths = buffer.sample(-1) expected_states = State(torch.arange(0, 9), done[0:9]) expected_next_done = torch.zeros(9) expected_next_done[5] = 1 expected_next_done[7] = 1 expected_next_done[8] = 1 expect_next_states = State( torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]), expected_next_done) expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float() expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float() self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expect_next_states) tt.assert_equal(lengths, expected_lengths) tt.assert_allclose(returns, expected_returns)
def test_rollout(self): buffer = NStepBuffer(2, discount_factor=0.5) actions = torch.ones((3)) states = State(torch.arange(0, 12)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) buffer.store(states[9:12], actions, 4 * torch.ones(3)) self.assertEqual(len(buffer), 6) states, actions, returns, next_states, lengths = buffer.sample(6) expected_states = State(torch.arange(0, 6)) expected_next_states = State(torch.arange(6, 12)) expected_returns = torch.tensor([ 2, 2, 2, 4, 4, 4, ]).float() expected_lengths = torch.tensor([ 2, 2, 2, 2, 2, 2, ]) self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expected_next_states) tt.assert_allclose(returns, expected_returns) tt.assert_equal(lengths, expected_lengths)
def _compare_models(): for i_layer, (ref_np, dp_np) in enumerate( zip(trainer.reference_model.named_parameters(), trainer.dataparallel_model.named_parameters())): if i_layer == 0: print(ref_np[0], dp_np[0]) print("Weights:") print(ref_np[1].data[0, 0, ...]) print(dp_np[1].data[0, 0, ...]) print("Grads:") if ref_np[1].grad is not None: print(ref_np[1].grad[0, 0, ...]) else: print("None") if dp_np[1].grad is not None: print(dp_np[1].grad[0, 0, ...]) else: print("None") print("") rtol = 2e-2 atol = 1e-7 tt.assert_allclose(ref_np[1].data, dp_np[1].data, rtol=rtol, atol=atol) if ref_np[1].grad is not None and dp_np[1].grad is not None: tt.assert_allclose(ref_np[1].grad, dp_np[1].grad, rtol=rtol)
def test_rollout_with_nones(self): buffer = NStepAdvantageBuffer(self.v, self.features, 3, 3, discount_factor=0.5) done = torch.ones(12) done[5] = 0 done[7] = 0 done[9] = 0 states = State(torch.arange(0, 12).unsqueeze(1), done) actions = torch.ones((3)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) states, actions, advantages = buffer.advantages(states[9:12]) expected_states = State(torch.arange(0, 9).unsqueeze(1), done[0:9]) expected_next_done = torch.zeros(9) expected_next_done[5] = 1 expected_next_done[7] = 1 expected_next_done[8] = 1 expected_next_states = State( torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]).unsqueeze(1), expected_next_done) expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float() expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float() self.assert_states_equal(states, expected_states) tt.assert_allclose( advantages, self._compute_expected_advantages(expected_states, expected_returns, expected_next_states, expected_lengths))
def test_reset(self): state = State(torch.randn(4)) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04) self.agent.act(State(state.observation, done=True)) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), atol=1e-04) self.agent.act(State(state.observation)) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04)
def test_forward_mean(self): """ Compare forward pass to pytorch implementation """ for i in range(100): x = random_tensor_2d(10) y = random_tensor_2d(10) torch_mse = torch.nn.MSELoss() mytorch_mse = mytorch.nn.LossMSE() l1, l2 = torch_mse(x, y), mytorch_mse(x, y) tt.assert_allclose(l1, l2, rtol=1e-06)
def test_scaling(self): self.space = Box(np.array([-10, -5, 100]), np.array([10, -2, 200])) self.policy = SoftDeterministicPolicy(self.model, self.optimizer, self.space) state = State(torch.randn(1, STATE_DIM)) action, log_prob = self.policy(state) tt.assert_allclose(action, torch.tensor([[-3.09055, -4.752777, 188.98222]])) tt.assert_allclose(log_prob, torch.tensor([-0.397002]), rtol=1e-4)
def testModulus(self): for jit in [True, False]: modulus = sl.Modulus(jit=jit) x = torch.cuda.FloatTensor(100, 10, 4, 2).copy_(torch.rand(100, 10, 4, 2)) y = modulus(x) u = torch.squeeze(torch.sqrt(torch.sum(x * x, 3))) v = y[..., 0] tt.assert_allclose(u.cpu(), v.cpu(), atol=1e-6)
def test_rollout_with_nones(self): buffer = NStepBuffer(3, discount_factor=0.5) done = torch.ones(15) # [ # 0, 1, 2, # 3, 4, 5, # 6, 7, 8, # 9, 11, 12, # 13, 14, 15 # ] done[9] = 0 done[7] = 0 done[5] = 0 states = State(torch.arange(0, 15), done) actions = torch.ones((3)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) buffer.store(states[9:12], actions, 4 * torch.ones(3)) buffer.store(states[12:15], actions, 8 * torch.ones(3)) states, actions, returns, next_states, lengths = buffer.sample(6) expected_states = State(torch.arange(0, 6), torch.tensor([1, 1, 1, 1, 1, 0])) expected_next_states = State(torch.tensor([9, 7, 5, 9, 7, 5]), torch.zeros(6)) expected_returns = torch.tensor([ 3, 2, 1, 4, 2, 0, ]).float() expected_lengths = torch.tensor([ 3, 2, 1, 2, 1, 0, ]) self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expected_next_states) tt.assert_allclose(returns, expected_returns) tt.assert_equal(lengths, expected_lengths)
def testCublas(self): for jit in [True, False]: x = torch.rand(100, 128, 128, 2).cuda() filter = torch.rand(128, 128, 2).cuda() filter[..., 1] = 0 y = torch.ones(100, 128, 128, 2).cuda() z = torch.Tensor(100, 128, 128, 2).cuda() for i in range(100): y[i, :, :, 0] = x[i, :, :, 0] * filter[:, :, 0] - x[i, :, :, 1] * filter[:, :, 1] y[i, :, :, 1] = x[i, :, :, 1] * filter[:, :, 0] + x[i, :, :, 0] * filter[:, :, 1] z = sl.cdgmm(x, filter, jit=jit) tt.assert_allclose(y.cpu(), z.cpu(), atol=1e-6)
def testFFTUnormalized(self): # Check for a random tensor: x = torch.FloatTensor(25, 17, 3, 2).bernoulli_(0.5) for gpu in [True, False]: if gpu: x = x.cuda() else: x = x.cpu() x.narrow(3, 1, 1).fill_(0) fft = sl.Fft() y = fft(x) z = fft(y, direction='C2R') z /= 17 * 3 # FFTs are unnormalized tt.assert_allclose(x.select(3, 0).cpu(), z.cpu(), atol=1e-6)
def test_rollout(self): buffer = NStepBatchBuffer(2, 3, discount_factor=0.5) actions = torch.ones((3)) states = State(torch.arange(0, 12)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 4 * torch.ones(3)) states, _, returns, next_states, lengths = buffer.sample(-1) expected_states = State(torch.arange(0, 6)) expect_next_states = State( torch.cat((torch.arange(6, 9), torch.arange(6, 9)))) expected_returns = torch.tensor([0.5, 0.5, 0.5, 1, 1, 1]).float() expected_lengths = torch.tensor([2, 2, 2, 1, 1, 1]).long() self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expect_next_states) tt.assert_allclose(returns, expected_returns) tt.assert_equal(lengths, expected_lengths)
def testPeriodization(self): for jit in [True, False]: x = torch.rand(100, 1, 128, 128, 2).cuda().double() y = torch.zeros(100, 1, 8, 8, 2).cuda().double() for i in range(8): for j in range(8): for m in range(16): for n in range(16): y[..., i, j, :] += x[..., i + m * 8, j + n * 8, :] y = y / (16 * 16) periodize = sl.Periodize(jit=jit) z = periodize(x, k=16) tt.assert_allclose(y.cpu(), z.cpu(), atol=1e-8) z = periodize(x.cpu(), k=16) tt.assert_allclose(y.cpu(), z, atol=1e-8)
def test_multi_rollout(self): buffer = NStepAdvantageBuffer(self.v, self.features, 2, 2, discount_factor=0.5) raw_states = StateArray( torch.arange(0, 12).unsqueeze(1).float(), (12, )) actions = torch.ones((2)) buffer.store(raw_states[0:2], actions, torch.ones(2)) buffer.store(raw_states[2:4], actions, torch.ones(2)) states, actions, advantages = buffer.advantages(raw_states[4:6]) expected_states = StateArray( torch.arange(0, 4).unsqueeze(1).float(), (4, )) expected_returns = torch.tensor([1.5, 1.5, 1, 1]) expected_next_states = StateArray( torch.tensor([4., 5, 4, 5]).unsqueeze(1), (4, )) expected_lengths = torch.tensor([2., 2, 1, 1]) self.assert_states_equal(states, expected_states) tt.assert_allclose( advantages, self._compute_expected_advantages(expected_states, expected_returns, expected_next_states, expected_lengths)) buffer.store(raw_states[4:6], actions, torch.ones(2)) buffer.store(raw_states[6:8], actions, torch.ones(2)) states, actions, advantages = buffer.advantages(raw_states[8:10]) expected_states = StateArray( torch.arange(4, 8).unsqueeze(1).float(), (4, )) self.assert_states_equal(states, expected_states) tt.assert_allclose( advantages, self._compute_expected_advantages( expected_states, torch.tensor([1.5, 1.5, 1, 1]), StateArray( torch.tensor([8, 9, 8, 9]).unsqueeze(1).float(), (4, )), torch.tensor([2., 2, 1, 1])))
def test_multi_rollout(self): buffer = NStepBuffer(2, discount_factor=0.5) raw_states = State(torch.arange(12)) expected_lengths = torch.tensor([2, 2, 2, 2]) actions = torch.ones(2) buffer.store(raw_states[0:2], actions, torch.ones(2)) buffer.store(raw_states[2:4], actions, torch.ones(2)) buffer.store(raw_states[4:6], actions, torch.ones(2)) buffer.store(raw_states[6:8], actions, torch.ones(2) * 2) states, actions, returns, next_states, lengths = buffer.sample(4) self.assert_states_equal(states, State(torch.arange(0, 4))) self.assert_states_equal(next_states, State(torch.arange(4, 8))) tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 2, 2])) tt.assert_equal(lengths, expected_lengths) buffer.store(raw_states[8:10], actions, torch.ones(2)) buffer.store(raw_states[10:12], actions, torch.ones(2)) states, actions, returns, next_states, lengths = buffer.sample(4) self.assert_states_equal(states, State(torch.arange(4, 8))) self.assert_states_equal(next_states, State(torch.arange(8, 12))) tt.assert_allclose(returns, torch.tensor([2.5, 2.5, 1.5, 1.5])) tt.assert_equal(lengths, expected_lengths)
def test_multi_rollout(self): buffer = NStepBatchBuffer(2, 2, discount_factor=0.5) raw_states = State(torch.arange(0, 12)) actions = torch.ones((2)) buffer.store(raw_states[0:2], actions, torch.ones(2)) buffer.store(raw_states[2:4], actions, torch.ones(2)) buffer.store(raw_states[4:6], actions, torch.ones(2)) states, actions, returns, next_states, lengths = buffer.sample(-1) self.assert_states_equal(states, State(torch.arange(0, 4))) self.assert_states_equal(next_states, State(torch.tensor([4, 5, 4, 5]))) tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 1, 1])) tt.assert_equal(lengths, torch.tensor([2, 2, 1, 1])) buffer.store(raw_states[6:8], actions, torch.ones(2)) buffer.store(raw_states[8:10], actions, torch.ones(2)) states, actions, returns, next_states, lengths = buffer.sample(-1) self.assert_states_equal(states, State(torch.arange(4, 8))) self.assert_states_equal(next_states, State(torch.tensor([8, 9, 8, 9]))) tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 1, 1])) tt.assert_equal(lengths, torch.tensor([2, 2, 1, 1]))
def test_single_env(self): state = State(torch.randn(4)) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 0.]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04) self.agent.act(state) tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), atol=1e-04)
def test_with_not_allclose_tensors(self): a = torch.tensor([1, 2, 23.6579, 0]) b = torch.tensor([1, 2, 23.65789, 0]) with self.assertRaisesRegex(AssertionError, 'Not equal to tolerance'): tt.assert_allclose(a, b)
def test_atol_param_with_allclose_tensors(self): a = torch.tensor([0, 0, 0]) b = torch.tensor([-1, 0, 1]) tt.assert_allclose(a, b, atol=1, rtol=0)
def test_atol_param_with_not_allclose_tensors(self): a = torch.tensor([0]) b = torch.tensor([-1.001]) with self.assertRaisesRegex(AssertionError, 'Not equal to tolerance'): tt.assert_allclose(a, b, atol=1, rtol=0)
def test_with_allclose_tensors(self): a = torch.tensor([1, 2, 23.65799, 0]) b = torch.tensor([1, 2, 23.657989, 0]) tt.assert_allclose(a, b)