Пример #1
0
def test_independent_normal():
    loc = torch.as_tensor([[0.3, 0.7], [0.2, 0.4]])
    scale = torch.as_tensor([[0.1, 0.2], [0.3, 0.8]])
    dist = torch.distributions.Independent(
        torch.distributions.Normal(loc, scale), 1)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)
Пример #2
0
 def test_evaluate_actions_as_quantiles(self):
     sample_actions = torch.randint(self.action_size,
                                    size=(self.batch_size, ))
     z = self.av.evaluate_actions_as_quantiles(sample_actions)
     self.assertIsInstance(z, torch.Tensor)
     for b in range(self.batch_size):
         torch_assert_allclose(z[b], self.quantiles[b, :,
                                                    sample_actions[b]])
Пример #3
0
def test_lambda():
    model = nn.Sequential(
        nn.ReLU(),
        Lambda(lambda x: x + 1),
        nn.ReLU(),
    )
    x = torch.rand(3, 2)
    # Since x is all positive, ReLU will not have any effects
    y = model(x)
    torch_assert_allclose(y, x + 1)
Пример #4
0
def test_cosine_basis_functions(batch_size, m, n_basis_functions):
    x = torch.rand(batch_size, m, dtype=torch.float)
    y = iqn.cosine_basis_functions(x, n_basis_functions=n_basis_functions)
    assert y.shape == (batch_size, m, n_basis_functions)

    for i in range(batch_size):
        for j in range(m):
            for k in range(n_basis_functions):
                torch_assert_allclose(
                    y[i, j, k], torch.cos(x[i, j] * (k + 1) * np.pi), atol=1e-5,
                )
Пример #5
0
    def _test(self, device):
        # A must be symmetric and positive-definite
        random_mat = torch.normal(0, 1, size=(self.n, self.n))
        random_mat.to(device)
        A = torch.matmul(random_mat, random_mat.T)
        x_ans = torch.normal(0, 1, size=(self.n, ))
        x_ans.to(device)
        b = torch.matmul(A, x_ans)

        def A_product_func(vec):
            assert vec.shape == b.shape
            return torch.matmul(A, vec)

        x = pfrl.utils.conjugate_gradient(A_product_func, b)
        torch_assert_allclose(x, x_ans, rtol=1e-1)
Пример #6
0
def test_compute_eltwise_huber_quantile_loss(batch_size, N, N_prime):
    # Overestimation is penalized proportionally to 1-tau
    # Underestimation is penalized proportionally to tau
    y = torch.randn(batch_size, N, dtype=torch.float, requires_grad=True)
    t = torch.randn(batch_size, N_prime, dtype=torch.float)
    tau = torch.rand(batch_size, N, dtype=torch.float)

    loss = iqn.compute_eltwise_huber_quantile_loss(y, t, tau)
    y_b, t_b = torch.broadcast_tensors(
        y.reshape(batch_size, N, 1),
        t.reshape(batch_size, 1, N_prime),
    )
    assert loss.shape == (batch_size, N, N_prime)
    huber_loss = nn.functional.smooth_l1_loss(y_b, t_b, reduction="none")
    assert huber_loss.shape == (batch_size, N, N_prime)

    for i in range(batch_size):
        for j in range(N):
            for k in range(N_prime):
                # loss is always positive
                scalar_loss = loss[i, j, k]
                scalar_grad = torch.autograd.grad([scalar_loss], [y],
                                                  retain_graph=True)[0][i, j]
                assert float(scalar_loss) > 0
                if y[i, j] > t[i, k]:
                    # y over-estimates t
                    # loss equals huber loss scaled by (1-tau)
                    correct_scalar_loss = (1 - tau[i, j]) * huber_loss[i, j, k]
                else:
                    # y under-estimates t
                    # loss equals huber loss scaled by tau
                    correct_scalar_loss = tau[i, j] * huber_loss[i, j, k]
                correct_scalar_grad = torch.autograd.grad(
                    [correct_scalar_loss], [y], retain_graph=True)[0][i, j]
                torch_assert_allclose(
                    scalar_loss,
                    correct_scalar_loss,
                    atol=1e-5,
                )
                torch_assert_allclose(
                    scalar_grad,
                    correct_scalar_grad,
                    atol=1e-5,
                )
Пример #7
0
 def test_getitem(self):
     n_batch = 7
     ndim_action = 3
     mu = np.random.randn(n_batch, ndim_action).astype(np.float32)
     mat = np.broadcast_to(
         np.eye(ndim_action, dtype=np.float32)[None],
         (n_batch, ndim_action, ndim_action),
     )
     v = np.random.randn(n_batch).astype(np.float32)
     min_action, max_action = -1, 1
     qout = action_value.QuadraticActionValue(
         torch.tensor(mu),
         torch.tensor(mat),
         torch.tensor(v),
         min_action,
         max_action,
     )
     sliced = qout[:3]
     torch_assert_allclose(sliced.mu, mu[:3])
     torch_assert_allclose(sliced.mat, mat[:3])
     torch_assert_allclose(sliced.v, v[:3])
     torch_assert_allclose(sliced.min_action[0], min_action)
     torch_assert_allclose(sliced.max_action[0], max_action)
Пример #8
0
    def test_soft_copy_param_scalar(self):
        a = nn.Module()
        a.p = nn.Parameter(torch.as_tensor(0.5))
        b = nn.Module()
        b.p = nn.Parameter(torch.as_tensor(1.0))

        # a = (1 - tau) * a + tau * b
        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.p, torch.full_like(a.p, 0.55))
        torch_assert_allclose(b.p, torch.full_like(b.p, 1.0))

        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.p, torch.full_like(a.p, 0.595))
        torch_assert_allclose(b.p, torch.full_like(b.p, 1.0))
Пример #9
0
    def test_soft_copy_param(self):
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)

        with torch.no_grad():
            a.weight.fill_(0.5)
            b.weight.fill_(1)

        # a = (1 - tau) * a + tau * b
        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.55))
        torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0))

        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.595))
        torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0))
Пример #10
0
def test_branched(batch_size):
    link1 = nn.Linear(2, 3)
    link2 = nn.Linear(2, 5)
    link3 = nn.Sequential(
        nn.Linear(2, 7),
        nn.Tanh(),
    )
    plink = Branched(link1, link2, link3)
    x = torch.zeros(batch_size, 2, dtype=torch.float)
    pout = plink(x)
    assert isinstance(pout, tuple)
    assert len(pout) == 3
    out1 = link1(x)
    out2 = link2(x)
    out3 = link3(x)
    torch_assert_allclose(pout[0], out1)
    torch_assert_allclose(pout[1], out2)
    torch_assert_allclose(pout[2], out3)
Пример #11
0
    def test_copy_grad(self):
        def set_random_grad(link):
            link.zero_grad()
            x = np.random.normal(size=(1, 1)).astype(np.float32)
            y = link(torch.from_numpy(x)) * np.random.normal()
            torch.sum(y).backward()

        # When source is not None and target is None
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)
        set_random_grad(a)
        b.zero_grad()
        assert a.weight.grad is not None
        assert a.bias.grad is not None
        assert b.weight.grad is None
        assert b.bias.grad is None
        copy_param.copy_grad(target_link=b, source_link=a)
        torch_assert_allclose(a.weight.grad, b.weight.grad)
        torch_assert_allclose(a.bias.grad, b.bias.grad)
        assert a.weight.grad is not b.weight.grad
        assert a.bias.grad is not b.bias.grad

        # When both are not None
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)
        set_random_grad(a)
        set_random_grad(b)
        assert a.weight.grad is not None
        assert a.bias.grad is not None
        assert b.weight.grad is not None
        assert b.bias.grad is not None
        copy_param.copy_grad(target_link=b, source_link=a)
        torch_assert_allclose(a.weight.grad, b.weight.grad)
        torch_assert_allclose(a.bias.grad, b.bias.grad)
        assert a.weight.grad is not b.weight.grad
        assert a.bias.grad is not b.bias.grad

        # When source is None and target is not None
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)
        a.zero_grad()
        set_random_grad(b)
        assert a.weight.grad is None
        assert a.bias.grad is None
        assert b.weight.grad is not None
        assert b.bias.grad is not None
        copy_param.copy_grad(target_link=b, source_link=a)
        assert a.weight.grad is None
        assert a.bias.grad is None
        assert b.weight.grad is None
        assert b.bias.grad is None

        # When both are None
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)
        a.zero_grad()
        b.zero_grad()
        assert a.weight.grad is None
        assert a.bias.grad is None
        assert b.weight.grad is None
        assert b.bias.grad is None
        copy_param.copy_grad(target_link=b, source_link=a)
        assert a.weight.grad is None
        assert a.bias.grad is None
        assert b.weight.grad is None
        assert b.bias.grad is None
Пример #12
0
def test_ppo_dataset_recurrent_and_non_recurrent_equivalence(
        use_obs_normalizer, gamma, lambd, max_recurrent_sequence_len):
    """Test equivalence between recurrent and non-recurrent datasets.

    When the same feed-forward model is used, the values of
    log_prob, v_pred, next_v_pred obtained by both recurrent and
    non-recurrent dataset creation functions should be the same.
    """
    episodes = make_random_episodes()
    if use_obs_normalizer:
        obs_normalizer = pfrl.nn.EmpiricalNormalization(2, clip_threshold=5)
        obs_normalizer.experience(torch.rand(10, 2))
    else:
        obs_normalizer = None

    def phi(obs):
        return (obs * 0.5).astype(np.float32)

    device = torch.device("cpu")

    obs_size = 2
    n_actions = 3

    non_recurrent_model = pfrl.nn.Branched(
        nn.Sequential(
            nn.Linear(obs_size, n_actions),
            SoftmaxCategoricalHead(),
        ),
        nn.Linear(obs_size, 1),
    )
    recurrent_model = RecurrentSequential(non_recurrent_model, )

    dataset = pfrl.agents.ppo._make_dataset(
        episodes=copy.deepcopy(episodes),
        model=non_recurrent_model,
        phi=phi,
        batch_states=batch_states,
        obs_normalizer=obs_normalizer,
        gamma=gamma,
        lambd=lambd,
        device=device,
    )

    dataset_recurrent = pfrl.agents.ppo._make_dataset_recurrent(
        episodes=copy.deepcopy(episodes),
        model=recurrent_model,
        phi=phi,
        batch_states=batch_states,
        obs_normalizer=obs_normalizer,
        gamma=gamma,
        lambd=lambd,
        max_recurrent_sequence_len=max_recurrent_sequence_len,
        device=device,
    )

    assert "log_prob" not in episodes[0][0]
    assert "log_prob" in dataset[0]
    assert "log_prob" in dataset_recurrent[0][0]
    # They are not just shallow copies
    assert dataset[0]["log_prob"] is not dataset_recurrent[0][0]["log_prob"]

    states = [tr["state"] for tr in dataset]
    recurrent_states = [
        tr["state"] for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(states, recurrent_states)

    actions = [tr["action"] for tr in dataset]
    recurrent_actions = [
        tr["action"] for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(actions, recurrent_actions)

    rewards = [tr["reward"] for tr in dataset]
    recurrent_rewards = [
        tr["reward"] for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(rewards, recurrent_rewards)

    nonterminals = [tr["nonterminal"] for tr in dataset]
    recurrent_nonterminals = [
        tr["nonterminal"]
        for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(nonterminals, recurrent_nonterminals)

    log_probs = [tr["log_prob"] for tr in dataset]
    recurrent_log_probs = [
        tr["log_prob"]
        for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(log_probs, recurrent_log_probs)

    vs_pred = [tr["v_pred"] for tr in dataset]
    recurrent_vs_pred = [
        tr["v_pred"] for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(vs_pred, recurrent_vs_pred)

    next_vs_pred = [tr["next_v_pred"] for tr in dataset]
    recurrent_next_vs_pred = [
        tr["next_v_pred"]
        for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(next_vs_pred, recurrent_next_vs_pred)

    advs = [tr["adv"] for tr in dataset]
    recurrent_advs = [
        tr["adv"] for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(advs, recurrent_advs)

    vs_teacher = [tr["v_teacher"] for tr in dataset]
    recurrent_vs_teacher = [
        tr["v_teacher"]
        for tr in itertools.chain.from_iterable(dataset_recurrent)
    ]
    torch_assert_allclose(vs_teacher, recurrent_vs_teacher)
Пример #13
0
    def _test_non_lstm(self, gpu, name):
        in_size = 2
        out_size = 3
        device = "cuda:{}".format(gpu) if gpu >= 0 else "cpu"
        seqs_x = [
            torch.rand(4, in_size, device=device),
            torch.rand(1, in_size, device=device),
            torch.rand(3, in_size, device=device),
        ]
        seqs_x = torch.nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False)
        self.assertTrue(name in ("GRU", "RNN"))
        cls = getattr(nn, name)
        link = cls(num_layers=1, input_size=in_size, hidden_size=out_size)
        link.to(device)

        # Forward twice: with None and non-None random states
        y0, h0 = link(seqs_x, None)
        y1, h1 = link(seqs_x, h0)
        y0, _ = torch.nn.utils.rnn.pad_packed_sequence(y0, batch_first=True)
        y1, _ = torch.nn.utils.rnn.pad_packed_sequence(y1, batch_first=True)
        self.assertEqual(h0.shape, (1, 3, out_size))
        self.assertEqual(h1.shape, (1, 3, out_size))
        self.assertEqual(y0.shape, (3, 4, out_size))
        self.assertEqual(y1.shape, (3, 4, out_size))

        # Masked at 0
        rs0_mask0 = mask_recurrent_state_at(h0, 0)
        y1m0, _ = link(seqs_x, rs0_mask0)
        y1m0, _ = torch.nn.utils.rnn.pad_packed_sequence(y1m0,
                                                         batch_first=True)
        torch_assert_allclose(y1m0[0], y0[0])
        torch_assert_allclose(y1m0[1], y1[1])
        torch_assert_allclose(y1m0[2], y1[2])

        # Masked at (1, 2)
        rs0_mask12 = mask_recurrent_state_at(h0, (1, 2))
        y1m12, _ = link(seqs_x, rs0_mask12)
        y1m12, _ = torch.nn.utils.rnn.pad_packed_sequence(y1m12,
                                                          batch_first=True)
        torch_assert_allclose(y1m12[0], y1[0])
        torch_assert_allclose(y1m12[1], y0[1])
        torch_assert_allclose(y1m12[2], y0[2])

        # Get at 1 and concat with None
        rs0_get1 = get_recurrent_state_at(h0, 1, detach=False)
        assert rs0_get1.requires_grad
        torch_assert_allclose(rs0_get1, h0[:, 1])
        concat_rs_get1 = concatenate_recurrent_states([None, rs0_get1, None])
        y1g1, _ = link(seqs_x, concat_rs_get1)
        y1g1, _ = torch.nn.utils.rnn.pad_packed_sequence(y1g1,
                                                         batch_first=True)
        torch_assert_allclose(y1g1[0], y0[0])
        torch_assert_allclose(y1g1[1], y1[1])
        torch_assert_allclose(y1g1[2], y0[2])

        # Get at 1 with detach=True
        rs0_get1_detach = get_recurrent_state_at(h0, 1, detach=True)
        assert not rs0_get1_detach.requires_grad
        torch_assert_allclose(rs0_get1_detach, h0[:, 1])
Пример #14
0
 def test_getitem(self):
     sliced = self.av[:10]
     torch_assert_allclose(sliced.q_values, self.q_values[:10])
     torch_assert_allclose(sliced.quantiles, self.quantiles[:10])
     self.assertEqual(sliced.n_actions, self.action_size)
     self.assertIs(sliced.q_values_formatter, self.av.q_values_formatter)
Пример #15
0
def test_normal():
    loc = torch.as_tensor([0.3, 0.5])
    scale = torch.as_tensor([0.1, 0.9])
    dist = torch.distributions.Normal(loc, scale)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)
Пример #16
0
def test_torch_assert_allclose():
    x = [torch.zeros(2), torch.ones(2)]
    y = [[0, 0], [1, 1]]
    torch_assert_allclose(x, y)
Пример #17
0
 def test_hessian_vector_product_nonzero(vec):
     hvp = compute_hessian_vector_product(y, params, vec)
     hessian = compute_hessian(y, params)
     self.assertGreater(np.count_nonzero(hvp.numpy()), 0)
     self.assertGreater(np.count_nonzero(hessian.numpy()), 0)
     torch_assert_allclose(hvp, torch.matmul(hessian, vec), atol=1e-3)
Пример #18
0
def test_multivariate_normal():
    loc = torch.as_tensor([0.3, 0.7])
    cov = torch.as_tensor([[0.1, 0.0], [0.0, 0.9]])
    dist = torch.distributions.MultivariateNormal(loc, cov)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)
Пример #19
0
    def _test_forward_with_modified_recurrent_state(self, gpu):
        in_size = 2
        out0_size = 2
        out1_size = 3
        par = RecurrentBranched(
            nn.GRU(num_layers=1, input_size=in_size, hidden_size=out0_size),
            RecurrentSequential(
                nn.LSTM(num_layers=1, input_size=in_size, hidden_size=out1_size),
            ),
        )
        if gpu >= 0:
            device = torch.device("cuda:{}".format(gpu))
            par.to(device)
        else:
            device = torch.device("cpu")
        seqs_x = [
            torch.rand(2, in_size, device=device),
            torch.rand(2, in_size, device=device),
        ]
        packed_x = nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False)
        x_t0 = torch.stack((seqs_x[0][0], seqs_x[1][0]))
        x_t1 = torch.stack((seqs_x[0][1], seqs_x[1][1]))

        (gru_out, lstm_out), (gru_rs, (lstm_rs,)) = par(packed_x, None)

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = one_step_forward(par, x_t0, None)
            return one_step_forward(par, x_t1, rs)

        (
            (nomask_gru_out, nomask_lstm_out),
            (nomask_gru_rs, (nomask_lstm_rs,)),
        ) = no_mask_forward_twice()

        # GRU
        torch_assert_allclose(gru_out.data[2:], nomask_gru_out, atol=1e-5)
        torch_assert_allclose(gru_rs, nomask_gru_rs)

        # LSTM
        torch_assert_allclose(lstm_out.data[2:], nomask_lstm_out, atol=1e-5)
        torch_assert_allclose(lstm_rs[0], nomask_lstm_rs[0], atol=1e-5)
        torch_assert_allclose(lstm_rs[1], nomask_lstm_rs[1], atol=1e-5)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = one_step_forward(par, x_t0, None)
            rs = mask_recurrent_state_at(rs, 0)
            return one_step_forward(par, x_t1, rs)

        (
            (mask0_gru_out, mask0_lstm_out),
            (mask0_gru_rs, (mask0_lstm_rs,)),
        ) = mask0_forward_twice()

        # GRU
        with self.assertRaises(AssertionError):
            torch_assert_allclose(gru_out.data[2], mask0_gru_out[0], atol=1e-5)
        torch_assert_allclose(gru_out.data[3], mask0_gru_out[1], atol=1e-5)

        # LSTM
        with self.assertRaises(AssertionError):
            torch_assert_allclose(lstm_out.data[2], mask0_lstm_out[0], atol=1e-5)
        torch_assert_allclose(lstm_out.data[3], mask0_lstm_out[1], atol=1e-5)

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = one_step_forward(par, x_t0, None)
            rs = mask_recurrent_state_at(rs, 1)
            return one_step_forward(par, x_t1, rs)

        (
            (mask1_gru_out, mask1_lstm_out),
            (mask1_gru_rs, (mask1_lstm_rs,)),
        ) = mask1_forward_twice()

        # GRU
        torch_assert_allclose(gru_out.data[2], mask1_gru_out[0], atol=1e-5)
        with self.assertRaises(AssertionError):
            torch_assert_allclose(gru_out.data[3], mask1_gru_out[1], atol=1e-5)

        # LSTM
        torch_assert_allclose(lstm_out.data[2], mask1_lstm_out[0], atol=1e-5)
        with self.assertRaises(AssertionError):
            torch_assert_allclose(lstm_out.data[3], mask1_lstm_out[1], atol=1e-5)

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = one_step_forward(par, x_t0, None)
            rs = mask_recurrent_state_at(rs, [0, 1])
            return one_step_forward(par, x_t1, rs)

        (
            (mask01_gru_out, mask01_lstm_out),
            (mask01_gru_rs, (mask01_lstm_rs,)),
        ) = mask01_forward_twice()

        # GRU
        with self.assertRaises(AssertionError):
            torch_assert_allclose(gru_out.data[2], mask01_gru_out[0], atol=1e-5)
        with self.assertRaises(AssertionError):
            torch_assert_allclose(gru_out.data[3], mask01_gru_out[1], atol=1e-5)

        # LSTM
        with self.assertRaises(AssertionError):
            torch_assert_allclose(lstm_out.data[2], mask01_lstm_out[0], atol=1e-5)
        with self.assertRaises(AssertionError):
            torch_assert_allclose(lstm_out.data[3], mask01_lstm_out[1], atol=1e-5)

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = one_step_forward(par, x_t0, None)
            rs0 = get_recurrent_state_at(rs, 0, detach=True)
            rs1 = get_recurrent_state_at(rs, 1, detach=True)
            concat_rs = concatenate_recurrent_states([rs0, rs1])
            return one_step_forward(par, x_t1, concat_rs)

        (
            (getcon_gru_out, getcon_lstm_out),
            (getcon_gru_rs, (getcon_lstm_rs,)),
        ) = get_and_concat_rs_forward()

        # GRU
        torch_assert_allclose(gru_out.data[2], getcon_gru_out[0], atol=1e-5)
        torch_assert_allclose(gru_out.data[3], getcon_gru_out[1], atol=1e-5)

        # LSTM
        torch_assert_allclose(lstm_out.data[2], getcon_lstm_out[0], atol=1e-5)
        torch_assert_allclose(lstm_out.data[3], getcon_lstm_out[1], atol=1e-5)
Пример #20
0
def test_transform():
    base_dist = torch.distributions.Normal(loc=2, scale=1)
    dist = torch.distributions.TransformedDistribution(
        base_dist, [torch.distributions.transforms.TanhTransform()])
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode.tolist(), math.tanh(2))
Пример #21
0
 def test_q_values(self):
     self.assertIsInstance(self.av.q_values, torch.Tensor)
     torch_assert_allclose(self.av.q_values, self.q_values)
Пример #22
0
    def _test_forward(self, gpu):
        in_size = 2
        out_size = 6

        rseq = RecurrentSequential(
            nn.Linear(in_size, 3),
            nn.ELU(),
            nn.LSTM(num_layers=1, input_size=3, hidden_size=4),
            nn.Linear(4, 5),
            nn.RNN(num_layers=1, input_size=5, hidden_size=out_size),
            nn.Tanh(),
        )

        if gpu >= 0:
            device = torch.device("cuda:{}".format(gpu))
            rseq.to(device)
        else:
            device = torch.device("cpu")

        assert len(rseq.recurrent_children) == 2
        assert rseq.recurrent_children[0] is rseq[2]
        assert rseq.recurrent_children[1] is rseq[4]

        linear1 = rseq[0]
        lstm = rseq[2]
        linear2 = rseq[3]
        rnn = rseq[4]

        seqs_x = [
            torch.rand(4, in_size, requires_grad=True, device=device),
            torch.rand(1, in_size, requires_grad=True, device=device),
            torch.rand(3, in_size, requires_grad=True, device=device),
        ]

        packed_x = nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False)

        out, _ = rseq(packed_x, None)
        self.assertEqual(out.data.shape, (8, out_size))

        # Check if the output matches that of step-by-step execution
        def manual_forward(seqs_x):
            seqs_y = []
            for seq_x in seqs_x:
                lstm_st = None
                rnn_st = None
                seq_y = []
                for i in range(len(seq_x)):
                    h = seq_x[i:i + 1]
                    h = linear1(h)
                    h = F.elu(h)
                    h, lstm_st = _step_lstm(lstm, h, lstm_st)
                    h = linear2(h)
                    h, rnn_st = _step_rnn_tanh(rnn, h, rnn_st)
                    y = F.tanh(h)
                    seq_y.append(y[0])
                seqs_y.append(torch.stack(seq_y))
            return nn.utils.rnn.pack_sequence(seqs_y, enforce_sorted=False)

        manual_out = manual_forward(seqs_x)
        torch_assert_allclose(out.data, manual_out.data, atol=1e-4)

        # Finally, check the gradient (wrt input)
        grads = torch.autograd.grad([torch.sum(out.data)], seqs_x)
        manual_grads = torch.autograd.grad([torch.sum(manual_out.data)],
                                           seqs_x)
        assert len(grads) == len(manual_grads) == 3
        for grad, manual_grad in zip(grads, manual_grads):
            torch_assert_allclose(grad, manual_grad, atol=1e-4)
Пример #23
0
def test_torch_assert_allclose_fail():
    with pytest.raises(AssertionError):
        x = [torch.zeros(2), torch.ones(2)]
        y = [[0, 0], [1, 0]]
        torch_assert_allclose(x, y)