Exemplo n.º 1
0
Arquivo: acer.py Projeto: pfnet/pfrl
 def sync_parameters(self):
     copy_param.copy_param(target_link=self.model, source_link=self.shared_model)
     copy_param.soft_copy_param(
         target_link=self.shared_average_model,
         source_link=self.model,
         tau=1 - self.trust_region_alpha,
     )
Exemplo n.º 2
0
    def test_soft_copy_param_shape_check(self):
        a = nn.Linear(2, 5)
        b = nn.Linear(1, 5)

        # Different shape
        with self.assertRaises(AssertionError):
            copy_param.soft_copy_param(a, b, 0.1)

        with self.assertRaises(AssertionError):
            copy_param.soft_copy_param(b, a, 0.1)
Exemplo n.º 3
0
    def test_soft_copy_param_scalar(self):
        a = nn.Module()
        a.p = nn.Parameter(torch.as_tensor(0.5))
        b = nn.Module()
        b.p = nn.Parameter(torch.as_tensor(1.0))

        # a = (1 - tau) * a + tau * b
        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.p, torch.full_like(a.p, 0.55))
        torch_assert_allclose(b.p, torch.full_like(b.p, 1.0))

        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.p, torch.full_like(a.p, 0.595))
        torch_assert_allclose(b.p, torch.full_like(b.p, 1.0))
Exemplo n.º 4
0
    def test_soft_copy_param(self):
        a = nn.Linear(1, 5)
        b = nn.Linear(1, 5)

        with torch.no_grad():
            a.weight.fill_(0.5)
            b.weight.fill_(1)

        # a = (1 - tau) * a + tau * b
        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.55))
        torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0))

        copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)

        torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.595))
        torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0))