示例#1
0
 def test_multi_env(self):
     state = State(torch.randn(2, 2))
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor([[0.3923, -0.2236, 0.],
                                      [-0.3195, -1.2050, 0.]]),
                        atol=1e-04)
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor([[0.3923, -0.2236, 1e-3],
                                      [-0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
     self.agent.act(State(state.features, torch.tensor([1., 0.])), 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor([[0.3923, -0.2236, 2e-3],
                                      [-0.3195, -1.2050, 2e-3]]),
                        atol=1e-04)
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor([[0.3923, -0.2236, 3e-3],
                                      [-0.3195, -1.2050, 0.]]),
                        atol=1e-04)
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor([[0.3923, -0.2236, 4e-3],
                                      [-0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
示例#2
0
    def testScattering(self):
        data = torch.load('test/test_data.pt')
        x = data['x']
        S = data['S']
        scat = Scattering(128, 128, 4, pre_pad=False, jit=True)
        scat.cuda()
        x = x.cuda()
        S = S.cuda()
        tt.assert_allclose(S.cpu(), scat(x).cpu(), atol=1e-6)

        scat = Scattering(128, 128, 4, pre_pad=False, jit=False)
        Sg = []
        Sc = []
        for gpu in [True, False]:
            if gpu:
                x = x.cuda()
                scat.cuda()
                Sg = scat(x)
            else:
                x = x.cpu()
                scat.cpu()
                Sc = scat(x)
        """there are huge round off errors with fftw, numpy fft, cufft...
        and the kernels of periodization. We do not wish to play with that as it is meaningless."""
        tt.assert_allclose(Sg.cpu(), Sc.cpu(), atol=1e-1)
示例#3
0
    def test_rollout(self):
        buffer = NStepAdvantageBuffer(self.v,
                                      self.features,
                                      2,
                                      3,
                                      discount_factor=0.5)
        actions = torch.ones((3))
        states = State(torch.arange(0, 12).unsqueeze(1))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        states, _, advantages = buffer.advantages(states[6:9])

        expected_states = State(torch.arange(0, 6).unsqueeze(1))
        expected_next_states = State(
            torch.cat((torch.arange(6, 9), torch.arange(6, 9))).unsqueeze(1))
        expected_returns = torch.tensor([0.5, 0.5, 0.5, 1, 1, 1]).float()
        expected_lengths = torch.tensor([2., 2, 2, 1, 1, 1])

        self.assert_states_equal(states, expected_states)
        tt.assert_allclose(
            advantages,
            self._compute_expected_advantages(expected_states,
                                              expected_returns,
                                              expected_next_states,
                                              expected_lengths))
示例#4
0
 def test_reset(self):
     state = State(torch.randn(1, 4))
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor(
                            [[0.3923, -0.2236, -0.3195, -1.2050, 0.0000]]),
                        atol=1e-04)
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor(
                            [[0.3923, -0.2236, -0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
     self.agent.act(State(state.features, DONE), 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor(
                            [[0.3923, -0.2236, -0.3195, -1.2050, 2e-3]]),
                        atol=1e-04)
     self.agent.act(State(state.features), 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor(
                            [[0.3923, -0.2236, -0.3195, -1.2050, 0.0000]]),
                        atol=1e-04)
     self.agent.act(state, 0)
     tt.assert_allclose(self.test_agent.last_state.features,
                        torch.tensor(
                            [[0.3923, -0.2236, -0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
    def test_target(self):
        self.policy = DeterministicPolicy(
            self.model,
            self.optimizer,
            self.space,
            target=FixedTarget(3)
        )

        # choose initial action
        state = State(torch.ones(1, STATE_DIM))
        action = self.policy(state)
        tt.assert_equal(action, torch.zeros(1, ACTION_DIM))

        # run update step, make sure target network doesn't change
        action.sum().backward(retain_graph=True)
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # again...
        action.sum().backward(retain_graph=True)
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # third time, target should be updated
        action.sum().backward(retain_graph=True)
        self.policy.step()
        tt.assert_allclose(
            self.policy.eval(state),
            torch.tensor([[-0.595883, -0.595883, -0.595883]]),
            atol=1e-4,
        )
    def test_target(self):
        self.policy = DeterministicPolicy(self.model,
                                          self.optimizer,
                                          self.space,
                                          target=FixedTarget(3))
        state = State(torch.ones(1, STATE_DIM))

        # run update step, make sure target network doesn't change
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # again...
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # third time, target should be updated
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_allclose(
            self.policy.target(state),
            torch.tensor([[-0.574482, -0.574482, -0.574482]]),
            atol=1e-4,
        )
示例#7
0
 def test_multi_env(self):
     state = StateArray(torch.randn(2, 2), (2, ))
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor([[0.3923, -0.2236, 0.],
                                      [-0.3195, -1.2050, 0.]]),
                        atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor([[0.3923, -0.2236, 1e-3],
                                      [-0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
     self.agent.act(
         StateArray(state.observation, (2, ),
                    done=torch.tensor([False, True])))
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor([[0.3923, -0.2236, 2e-3],
                                      [-0.3195, -1.2050, 2e-3]]),
                        atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor([[0.3923, -0.2236, 3e-3],
                                      [-0.3195, -1.2050, 0.]]),
                        atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor([[0.3923, -0.2236, 4e-3],
                                      [-0.3195, -1.2050, 1e-3]]),
                        atol=1e-04)
示例#8
0
    def test_rollout_with_nones(self):
        buffer = NStepBatchBuffer(3, 3, discount_factor=0.5)
        done = torch.ones(12)
        done[5] = 0
        done[7] = 0
        done[9] = 0
        states = State(torch.arange(0, 12), done)
        actions = torch.ones((3))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        buffer.store(states[9:12], actions, 4 * torch.ones(3))
        states, actions, returns, next_states, lengths = buffer.sample(-1)

        expected_states = State(torch.arange(0, 9), done[0:9])
        expected_next_done = torch.zeros(9)
        expected_next_done[5] = 1
        expected_next_done[7] = 1
        expected_next_done[8] = 1
        expect_next_states = State(
            torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]), expected_next_done)
        expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float()
        expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float()

        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expect_next_states)
        tt.assert_equal(lengths, expected_lengths)
        tt.assert_allclose(returns, expected_returns)
示例#9
0
    def test_rollout(self):
        buffer = NStepBuffer(2, discount_factor=0.5)
        actions = torch.ones((3))
        states = State(torch.arange(0, 12))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        buffer.store(states[9:12], actions, 4 * torch.ones(3))
        self.assertEqual(len(buffer), 6)

        states, actions, returns, next_states, lengths = buffer.sample(6)
        expected_states = State(torch.arange(0, 6))
        expected_next_states = State(torch.arange(6, 12))
        expected_returns = torch.tensor([
            2,
            2,
            2,
            4,
            4,
            4,
        ]).float()
        expected_lengths = torch.tensor([
            2,
            2,
            2,
            2,
            2,
            2,
        ])
        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expected_next_states)
        tt.assert_allclose(returns, expected_returns)
        tt.assert_equal(lengths, expected_lengths)
        def _compare_models():
            for i_layer, (ref_np, dp_np) in enumerate(
                    zip(trainer.reference_model.named_parameters(),
                        trainer.dataparallel_model.named_parameters())):

                if i_layer == 0:
                    print(ref_np[0], dp_np[0])
                    print("Weights:")
                    print(ref_np[1].data[0, 0, ...])
                    print(dp_np[1].data[0, 0, ...])
                    print("Grads:")
                    if ref_np[1].grad is not None:
                        print(ref_np[1].grad[0, 0, ...])
                    else:
                        print("None")
                    if dp_np[1].grad is not None:
                        print(dp_np[1].grad[0, 0, ...])
                    else:
                        print("None")
                    print("")

                rtol = 2e-2
                atol = 1e-7
                tt.assert_allclose(ref_np[1].data,
                                   dp_np[1].data,
                                   rtol=rtol,
                                   atol=atol)
                if ref_np[1].grad is not None and dp_np[1].grad is not None:
                    tt.assert_allclose(ref_np[1].grad,
                                       dp_np[1].grad,
                                       rtol=rtol)
示例#11
0
    def test_rollout_with_nones(self):
        buffer = NStepAdvantageBuffer(self.v,
                                      self.features,
                                      3,
                                      3,
                                      discount_factor=0.5)
        done = torch.ones(12)
        done[5] = 0
        done[7] = 0
        done[9] = 0
        states = State(torch.arange(0, 12).unsqueeze(1), done)
        actions = torch.ones((3))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        states, actions, advantages = buffer.advantages(states[9:12])

        expected_states = State(torch.arange(0, 9).unsqueeze(1), done[0:9])
        expected_next_done = torch.zeros(9)
        expected_next_done[5] = 1
        expected_next_done[7] = 1
        expected_next_done[8] = 1
        expected_next_states = State(
            torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]).unsqueeze(1),
            expected_next_done)
        expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float()
        expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float()

        self.assert_states_equal(states, expected_states)
        tt.assert_allclose(
            advantages,
            self._compute_expected_advantages(expected_states,
                                              expected_returns,
                                              expected_next_states,
                                              expected_lengths))
示例#12
0
 def test_reset(self):
     state = State(torch.randn(4))
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor(
                            [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]),
                        atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor(
                            [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]),
                        atol=1e-04)
     self.agent.act(State(state.observation, done=True))
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor(
                            [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]),
                        atol=1e-04)
     self.agent.act(State(state.observation))
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor(
                            [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]),
                        atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation,
                        torch.tensor(
                            [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]),
                        atol=1e-04)
示例#13
0
 def test_forward_mean(self):
     """ Compare forward pass to pytorch implementation """
     for i in range(100):
         x = random_tensor_2d(10)
         y = random_tensor_2d(10)
         torch_mse = torch.nn.MSELoss()
         mytorch_mse = mytorch.nn.LossMSE()
         l1, l2 = torch_mse(x, y), mytorch_mse(x, y)
         tt.assert_allclose(l1, l2, rtol=1e-06)
 def test_scaling(self):
     self.space = Box(np.array([-10, -5, 100]), np.array([10, -2, 200]))
     self.policy = SoftDeterministicPolicy(self.model, self.optimizer,
                                           self.space)
     state = State(torch.randn(1, STATE_DIM))
     action, log_prob = self.policy(state)
     tt.assert_allclose(action,
                        torch.tensor([[-3.09055, -4.752777, 188.98222]]))
     tt.assert_allclose(log_prob, torch.tensor([-0.397002]), rtol=1e-4)
示例#15
0
    def testModulus(self):
        for jit in [True, False]:
            modulus = sl.Modulus(jit=jit)
            x = torch.cuda.FloatTensor(100, 10, 4,
                                       2).copy_(torch.rand(100, 10, 4, 2))
            y = modulus(x)
            u = torch.squeeze(torch.sqrt(torch.sum(x * x, 3)))
            v = y[..., 0]

            tt.assert_allclose(u.cpu(), v.cpu(), atol=1e-6)
示例#16
0
    def test_rollout_with_nones(self):
        buffer = NStepBuffer(3, discount_factor=0.5)
        done = torch.ones(15)
        # [
        #     0, 1, 2,
        #     3, 4, 5,
        #     6, 7, 8,
        #     9, 11, 12,
        #     13, 14, 15
        # ]
        done[9] = 0
        done[7] = 0
        done[5] = 0
        states = State(torch.arange(0, 15), done)
        actions = torch.ones((3))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        buffer.store(states[9:12], actions, 4 * torch.ones(3))
        buffer.store(states[12:15], actions, 8 * torch.ones(3))
        states, actions, returns, next_states, lengths = buffer.sample(6)

        expected_states = State(torch.arange(0, 6),
                                torch.tensor([1, 1, 1, 1, 1, 0]))
        expected_next_states = State(torch.tensor([9, 7, 5, 9, 7, 5]),
                                     torch.zeros(6))
        expected_returns = torch.tensor([
            3,
            2,
            1,
            4,
            2,
            0,
        ]).float()
        expected_lengths = torch.tensor([
            3,
            2,
            1,
            2,
            1,
            0,
        ])

        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expected_next_states)
        tt.assert_allclose(returns, expected_returns)
        tt.assert_equal(lengths, expected_lengths)
示例#17
0
    def testCublas(self):
        for jit in [True, False]:
            x = torch.rand(100, 128, 128, 2).cuda()
            filter = torch.rand(128, 128, 2).cuda()
            filter[..., 1] = 0
            y = torch.ones(100, 128, 128, 2).cuda()
            z = torch.Tensor(100, 128, 128, 2).cuda()

            for i in range(100):
                y[i, :, :,
                  0] = x[i, :, :, 0] * filter[:, :, 0] - x[i, :, :,
                                                           1] * filter[:, :, 1]
                y[i, :, :,
                  1] = x[i, :, :, 1] * filter[:, :, 0] + x[i, :, :,
                                                           0] * filter[:, :, 1]
            z = sl.cdgmm(x, filter, jit=jit)

            tt.assert_allclose(y.cpu(), z.cpu(), atol=1e-6)
示例#18
0
    def testFFTUnormalized(self):
        # Check for a random tensor:
        x = torch.FloatTensor(25, 17, 3, 2).bernoulli_(0.5)
        for gpu in [True, False]:

            if gpu:
                x = x.cuda()
            else:
                x = x.cpu()
            x.narrow(3, 1, 1).fill_(0)

            fft = sl.Fft()
            y = fft(x)
            z = fft(y, direction='C2R')

            z /= 17 * 3  # FFTs are unnormalized

            tt.assert_allclose(x.select(3, 0).cpu(), z.cpu(), atol=1e-6)
示例#19
0
    def test_rollout(self):
        buffer = NStepBatchBuffer(2, 3, discount_factor=0.5)
        actions = torch.ones((3))
        states = State(torch.arange(0, 12))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 4 * torch.ones(3))
        states, _, returns, next_states, lengths = buffer.sample(-1)

        expected_states = State(torch.arange(0, 6))
        expect_next_states = State(
            torch.cat((torch.arange(6, 9), torch.arange(6, 9))))
        expected_returns = torch.tensor([0.5, 0.5, 0.5, 1, 1, 1]).float()
        expected_lengths = torch.tensor([2, 2, 2, 1, 1, 1]).long()

        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expect_next_states)
        tt.assert_allclose(returns, expected_returns)
        tt.assert_equal(lengths, expected_lengths)
示例#20
0
    def testPeriodization(self):
        for jit in [True, False]:
            x = torch.rand(100, 1, 128, 128, 2).cuda().double()
            y = torch.zeros(100, 1, 8, 8, 2).cuda().double()

            for i in range(8):
                for j in range(8):
                    for m in range(16):
                        for n in range(16):
                            y[..., i, j, :] += x[..., i + m * 8, j + n * 8, :]

            y = y / (16 * 16)

            periodize = sl.Periodize(jit=jit)

            z = periodize(x, k=16)
            tt.assert_allclose(y.cpu(), z.cpu(), atol=1e-8)

            z = periodize(x.cpu(), k=16)
            tt.assert_allclose(y.cpu(), z, atol=1e-8)
    def test_multi_rollout(self):
        buffer = NStepAdvantageBuffer(self.v,
                                      self.features,
                                      2,
                                      2,
                                      discount_factor=0.5)
        raw_states = StateArray(
            torch.arange(0, 12).unsqueeze(1).float(), (12, ))
        actions = torch.ones((2))
        buffer.store(raw_states[0:2], actions, torch.ones(2))
        buffer.store(raw_states[2:4], actions, torch.ones(2))

        states, actions, advantages = buffer.advantages(raw_states[4:6])
        expected_states = StateArray(
            torch.arange(0, 4).unsqueeze(1).float(), (4, ))
        expected_returns = torch.tensor([1.5, 1.5, 1, 1])
        expected_next_states = StateArray(
            torch.tensor([4., 5, 4, 5]).unsqueeze(1), (4, ))
        expected_lengths = torch.tensor([2., 2, 1, 1])
        self.assert_states_equal(states, expected_states)
        tt.assert_allclose(
            advantages,
            self._compute_expected_advantages(expected_states,
                                              expected_returns,
                                              expected_next_states,
                                              expected_lengths))

        buffer.store(raw_states[4:6], actions, torch.ones(2))
        buffer.store(raw_states[6:8], actions, torch.ones(2))

        states, actions, advantages = buffer.advantages(raw_states[8:10])
        expected_states = StateArray(
            torch.arange(4, 8).unsqueeze(1).float(), (4, ))
        self.assert_states_equal(states, expected_states)
        tt.assert_allclose(
            advantages,
            self._compute_expected_advantages(
                expected_states, torch.tensor([1.5, 1.5, 1, 1]),
                StateArray(
                    torch.tensor([8, 9, 8, 9]).unsqueeze(1).float(), (4, )),
                torch.tensor([2., 2, 1, 1])))
示例#22
0
    def test_multi_rollout(self):
        buffer = NStepBuffer(2, discount_factor=0.5)
        raw_states = State(torch.arange(12))
        expected_lengths = torch.tensor([2, 2, 2, 2])
        actions = torch.ones(2)
        buffer.store(raw_states[0:2], actions, torch.ones(2))
        buffer.store(raw_states[2:4], actions, torch.ones(2))
        buffer.store(raw_states[4:6], actions, torch.ones(2))
        buffer.store(raw_states[6:8], actions, torch.ones(2) * 2)

        states, actions, returns, next_states, lengths = buffer.sample(4)
        self.assert_states_equal(states, State(torch.arange(0, 4)))
        self.assert_states_equal(next_states, State(torch.arange(4, 8)))
        tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 2, 2]))
        tt.assert_equal(lengths, expected_lengths)

        buffer.store(raw_states[8:10], actions, torch.ones(2))
        buffer.store(raw_states[10:12], actions, torch.ones(2))

        states, actions, returns, next_states, lengths = buffer.sample(4)
        self.assert_states_equal(states, State(torch.arange(4, 8)))
        self.assert_states_equal(next_states, State(torch.arange(8, 12)))
        tt.assert_allclose(returns, torch.tensor([2.5, 2.5, 1.5, 1.5]))
        tt.assert_equal(lengths, expected_lengths)
示例#23
0
    def test_multi_rollout(self):
        buffer = NStepBatchBuffer(2, 2, discount_factor=0.5)
        raw_states = State(torch.arange(0, 12))
        actions = torch.ones((2))
        buffer.store(raw_states[0:2], actions, torch.ones(2))
        buffer.store(raw_states[2:4], actions, torch.ones(2))
        buffer.store(raw_states[4:6], actions, torch.ones(2))

        states, actions, returns, next_states, lengths = buffer.sample(-1)
        self.assert_states_equal(states, State(torch.arange(0, 4)))
        self.assert_states_equal(next_states, State(torch.tensor([4, 5, 4,
                                                                  5])))
        tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 1, 1]))
        tt.assert_equal(lengths, torch.tensor([2, 2, 1, 1]))

        buffer.store(raw_states[6:8], actions, torch.ones(2))
        buffer.store(raw_states[8:10], actions, torch.ones(2))

        states, actions, returns, next_states, lengths = buffer.sample(-1)
        self.assert_states_equal(states, State(torch.arange(4, 8)))
        self.assert_states_equal(next_states, State(torch.tensor([8, 9, 8,
                                                                  9])))
        tt.assert_allclose(returns, torch.tensor([1.5, 1.5, 1, 1]))
        tt.assert_equal(lengths, torch.tensor([2, 2, 1, 1]))
示例#24
0
 def test_single_env(self):
     state = State(torch.randn(4))
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor(
         [0.3923, -0.2236, -0.3195, -1.2050, 0.]), atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor(
         [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04)
     self.agent.act(state)
     tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor(
         [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), atol=1e-04)
示例#25
0
    def test_with_not_allclose_tensors(self):
        a = torch.tensor([1, 2, 23.6579, 0])
        b = torch.tensor([1, 2, 23.65789, 0])

        with self.assertRaisesRegex(AssertionError, 'Not equal to tolerance'):
            tt.assert_allclose(a, b)
示例#26
0
 def test_atol_param_with_allclose_tensors(self):
     a = torch.tensor([0, 0, 0])
     b = torch.tensor([-1, 0, 1])
     tt.assert_allclose(a, b, atol=1, rtol=0)
示例#27
0
    def test_atol_param_with_not_allclose_tensors(self):
        a = torch.tensor([0])
        b = torch.tensor([-1.001])

        with self.assertRaisesRegex(AssertionError, 'Not equal to tolerance'):
            tt.assert_allclose(a, b, atol=1, rtol=0)
示例#28
0
 def test_with_allclose_tensors(self):
     a = torch.tensor([1, 2, 23.65799, 0])
     b = torch.tensor([1, 2, 23.657989, 0])
     tt.assert_allclose(a, b)