Exemplo n.º 1
0
 def setUp(self):
     super().setUp()
     self._input_spec = TensorSpec((1, ))
     self._epochs = 10
     self._batch_size = 100
     self._latent_dim = 2
     self._loss_f = math_ops.square
Exemplo n.º 2
0
def normalize_along_batch_dims(x, mean, variance, variance_epsilon):
    """Normalizes a tensor by ``mean`` and ``variance``, which are expected to have
    the same tensor spec with the inner dims of ``x``.

    Args:
        x (Tensor): a tensor of (``[D1, D2, ..] + shape``), where ``D1``, ``D2``, ..
            are arbitrary leading batch dims (can be empty).
        mean (Tensor): a tensor of ``shape``
        variance (Tensor): a tensor of ``shape``
        variance_epsilon (float): A small float number to avoid dividing by 0.
    Returns:
        Normalized tensor.
    """
    spec = TensorSpec.from_tensor(mean)
    assert spec == TensorSpec.from_tensor(variance), \
        "The specs of mean and variance must be equal!"

    bs = BatchSquash(get_outer_rank(x, spec))
    x = bs.flatten(x)

    variance_epsilon = torch.as_tensor(variance_epsilon).to(variance.dtype)
    inv = torch.rsqrt(variance + variance_epsilon)
    x = (x - mean.to(x.dtype)) * inv.to(x.dtype)

    x = bs.unflatten(x)
    return x
Exemplo n.º 3
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 skill_spec,
                 env,
                 config: TrainerConfig,
                 num_steps_per_skill=5,
                 rl_algorithm_cls=SacAlgorithm,
                 rl_mini_batch_size=128,
                 rl_mini_batch_length=2,
                 rl_replay_buffer_length=20000,
                 disc_mini_batch_size=64,
                 disc_mini_batch_length=4,
                 disc_replay_buffer_length=20000,
                 gamma=0.99,
                 optimizer=None,
                 debug_summaries=False,
                 name="SkillGenerator"):
        """
        """
        self._num_steps_per_skill = num_steps_per_skill
        self._observation_spec = observation_spec
        self._action_spec = action_spec
        self._skill_spec = skill_spec

        rl, discriminator = self._create_subalgorithms(
            rl_algorithm_cls, debug_summaries, env, config,
            rl_mini_batch_length, rl_mini_batch_size, rl_replay_buffer_length,
            disc_mini_batch_size, disc_mini_batch_length,
            disc_replay_buffer_length)

        discriminator.set_high_rl(rl)

        train_state_spec = SkillGeneratorState(
            discriminator=discriminator.train_state_spec,  # for discriminator
            skill=self._skill_spec)  # inputs to lower-level

        rollout_state_spec = train_state_spec._replace(
            rl=rl.train_state_spec,  # higher-level policy rollout
            rl_reward=TensorSpec(()),  # higher-level policy replay
            rl_discount=TensorSpec(()),  # higher-level policy replay
            steps=TensorSpec((), dtype='int64'))

        predict_state_spec = train_state_spec._replace(
            rl=rl.predict_state_spec,  # higher-level policy prediction
            steps=TensorSpec((), dtype='int64'),
            discriminator=discriminator.predict_state_spec)

        super().__init__(train_state_spec=train_state_spec,
                         rollout_state_spec=rollout_state_spec,
                         predict_state_spec=predict_state_spec,
                         optimizer=optimizer,
                         name=name)

        self._gamma = gamma
        self._discriminator = discriminator
        self._rl = rl
        self._rl_train = common.Periodically(self._rl.train_from_replay_buffer,
                                             period=1,
                                             name="periodic_higher_level")
Exemplo n.º 4
0
def get_subtrajectory_spec(num_steps_per_skill, observation_spec, action_spec):
    observation_traj_spec = TensorSpec(shape=(num_steps_per_skill, ) +
                                       observation_spec.shape)
    action_traj_spec = TensorSpec(shape=(num_steps_per_skill, ) +
                                  action_spec.shape)
    return SubTrajectory(observation=observation_traj_spec,
                         prev_action=action_traj_spec)
Exemplo n.º 5
0
    def test_param_network(self, batch_size=1):
        input_spec = TensorSpec((3, 32, 32), torch.float32)
        conv_layer_params = ((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1, 2))
        fc_layer_params = ((128, True), )
        last_layer_size = 10
        last_activation = math_ops.identity
        network = ParamNetwork(input_spec,
                               conv_layer_params=conv_layer_params,
                               fc_layer_params=fc_layer_params,
                               last_layer_param=(last_layer_size, True),
                               last_activation=last_activation)
        self.assertLen(network._fc_layers, 2)

        # test non-parallel forward
        image = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(image)
        output_shape = (batch_size, last_layer_size)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))

        # test parallel forward
        replica = 2
        image = input_spec.zeros(outer_dims=(batch_size, ))
        replica_image = input_spec.zeros(outer_dims=(batch_size, replica))
        params = torch.randn(replica, network.param_length)
        network.set_parameters(params)
        output, _ = network(image)
        replica_output, _ = network(replica_image)
        self.assertEqual(output.shape, replica_output.shape)

        output_shape = (batch_size, replica, last_layer_size)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))
Exemplo n.º 6
0
    def test_data_buffer(self):
        dim = 20
        capacity = 256
        data_spec = (TensorSpec(shape=()), TensorSpec(shape=(dim // 3 - 1, )),
                     TensorSpec(shape=(dim - dim // 3, )))

        data_buffer = DataBuffer(data_spec=data_spec, capacity=capacity)

        def _get_batch(batch_size):
            x = torch.randn(batch_size, dim, requires_grad=True)
            x = (x[:, 0], x[:, 1:dim // 3], x[..., dim // 3:])
            return x

        data_buffer.add_batch(_get_batch(100))
        self.assertEqual(int(data_buffer.current_size), 100)
        batch = _get_batch(1000)
        # test that the created batch has gradients
        self.assertTrue(batch[0].requires_grad)
        data_buffer.add_batch(batch)
        ret = data_buffer.get_batch(2)
        # test that DataBuffer detaches gradients of inputs
        self.assertFalse(ret[0].requires_grad)
        self.assertEqual(int(data_buffer.current_size), capacity)
        ret = data_buffer.get_batch_by_indices(torch.arange(capacity))
        self.assertEqual(ret[0], batch[0][-capacity:])
        self.assertEqual(ret[1], batch[1][-capacity:])
        self.assertEqual(ret[2], batch[2][-capacity:])
        batch = _get_batch(100)
        data_buffer.add_batch(batch)
        ret = data_buffer.get_batch_by_indices(
            torch.arange(data_buffer.current_size - 100,
                         data_buffer.current_size))
        self.assertEqual(ret[0], batch[0])
        self.assertEqual(ret[1], batch[1])
        self.assertEqual(ret[2], batch[2][-capacity:])

        # Test checkpoint working
        with tempfile.TemporaryDirectory() as checkpoint_directory:
            checkpoint = Checkpointer(checkpoint_directory,
                                      data_buffer=data_buffer)
            checkpoint.save(10)
            data_buffer = DataBuffer(data_spec=data_spec, capacity=capacity)
            checkpoint = Checkpointer(checkpoint_directory,
                                      data_buffer=data_buffer)
            global_step = checkpoint.load()
            self.assertEqual(global_step, 10)

        ret = data_buffer.get_batch_by_indices(
            torch.arange(data_buffer.current_size - 100,
                         data_buffer.current_size))
        self.assertEqual(ret[0], batch[0])
        self.assertEqual(ret[1], batch[1])
        self.assertEqual(ret[2], batch[2][-capacity:])

        data_buffer.clear()
        self.assertEqual(int(data_buffer.current_size), 0)
Exemplo n.º 7
0
    def test_noinit_copy_works(self):
        # pass a TensorSpec to prevent assertion error in Network
        network1 = NoInitNetwork(TensorSpec([2]), 1)
        network2 = network1.copy()

        self.assertNotEqual(network1, network2)
        self.assertEqual(TensorSpec([2]), network2.param1)
        self.assertEqual(1, network2.param2)
        self.assertEqual(2, network2.kwarg1)
        self.assertEqual(3, network2.kwarg2)
Exemplo n.º 8
0
 def setUp(self):
     self._input_tensor_spec = TensorSpec((10, ))
     self._time_step = TimeStep(
         step_type=StepType.MID,
         reward=0,
         discount=1,
         observation=self._input_tensor_spec.zeros(outer_dims=(1, )),
         prev_action=None,
         env_id=None)
     self._hidden_size = 100
Exemplo n.º 9
0
 def setUp(self):
     input_tensor_spec = TensorSpec((10, ))
     self._time_step = TimeStep(
         step_type=torch.tensor(StepType.MID, dtype=torch.int32),
         reward=0,
         discount=1,
         observation=input_tensor_spec.zeros(outer_dims=(1, )),
         prev_action=None,
         env_id=None)
     self._encoding_net = EncodingNetwork(
         input_tensor_spec=input_tensor_spec)
Exemplo n.º 10
0
    def test_parallel_image_encoding_network(self, same_padding,
                                             flatten_output):
        input_spec = TensorSpec((3, 80, 80), torch.float32)

        replica = 2
        network = ParallelImageEncodingNetwork(
            input_channels=input_spec.shape[0],
            input_size=input_spec.shape[1:3],
            n=replica,
            conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0)),
            same_padding=same_padding,
            flatten_output=flatten_output)

        self.assertLen(list(network.parameters()), 4)

        batch_size = 3
        # 1) shared input case
        img = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(img)

        if same_padding:
            output_shape = (batch_size, replica, 15, 20, 20)
        else:
            output_shape = (batch_size, replica, 15, 19, 19)

        if flatten_output:
            self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])),
                             network.output_spec.shape)
            self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])),
                             tuple(output.size()))
        else:
            self.assertEqual(output_shape[1:], network.output_spec.shape)
            self.assertEqual(output_shape, tuple(output.size()))

        # 2) non-shared input case
        img = input_spec.zeros(outer_dims=(
            batch_size,
            replica,
        ))
        output, _ = network(img)

        if same_padding:
            output_shape = (batch_size, replica, 15, 20, 20)
        else:
            output_shape = (batch_size, replica, 15, 19, 19)

        if flatten_output:
            self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])),
                             network.output_spec.shape)
            self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])),
                             tuple(output.size()))
        else:
            self.assertEqual(output_shape[1:], network.output_spec.shape)
            self.assertEqual(output_shape, tuple(output.size()))
Exemplo n.º 11
0
 def setUp(self):
     self._input_spec = [
         TensorSpec((3, 20, 20), torch.float32),
         TensorSpec((1, 20, 20), torch.float32)
     ]
     self._image = zero_tensor_from_nested_spec(self._input_spec,
                                                batch_size=1)
     self._conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
     self._fc_layer_params = (100, )
     self._input_preprocessors = [torch.tanh, None]
     self._preprocessing_combiner = NestConcat(dim=1)
Exemplo n.º 12
0
    def test_encoding_network_img(self):
        input_spec = TensorSpec((3, 80, 80), torch.float32)
        img = input_spec.zeros(outer_dims=(1, ))
        network = EncodingNetwork(input_tensor_spec=input_spec,
                                  conv_layer_params=((16, (5, 3), 2, (1, 1)),
                                                     (15, 3, (2, 2), 0)))

        self.assertLen(list(network.parameters()), 4)

        output, _ = network(img)
        output_spec = network._img_encoding_net.output_spec
        self.assertEqual(output.shape[-1], np.prod(output_spec.shape))
Exemplo n.º 13
0
 def __init__(self, dim=2):
     super().__init__(input_tensor_spec=[
         TensorSpec(shape=(dim, )),
         TensorSpec(shape=(dim, ))
     ],
                      name="Net")
     self.fc1 = nn.Linear(dim, dim, bias=False)
     self.fc2 = nn.Linear(dim, dim, bias=False)
     w = torch.tensor([[1, 2], [1, 1]], dtype=torch.float32)
     u = torch.zeros((dim, dim), dtype=torch.float32)
     self.fc1.weight = nn.Parameter(w.t())
     self.fc2.weight = nn.Parameter(u.t())
Exemplo n.º 14
0
    def test_continuous_skill_loss(self):
        skill_spec = TensorSpec((4, ))
        alg = DIAYNAlgorithm(skill_spec=skill_spec,
                             encoding_net=self._encoding_net)
        skill = state = skill_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(
                observation=[self._time_step.observation, skill]), state)

        # the discriminator should predict a zero skill vector
        self.assertTensorClose(torch.sum(alg_step.info.loss),
                               torch.as_tensor(0))
Exemplo n.º 15
0
def get_low_rl_input_spec(observation_spec, action_spec, num_steps_per_skill,
                          skill_spec):
    assert observation_spec.ndim == 1 and action_spec.ndim == 1
    concat_observation_spec = TensorSpec(
        (num_steps_per_skill * observation_spec.shape[0], ))
    concat_action_spec = TensorSpec(
        (num_steps_per_skill * action_spec.shape[0], ))
    traj_spec = SubTrajectory(observation=concat_observation_spec,
                              prev_action=concat_action_spec)
    step_spec = step_spec = BoundedTensorSpec(shape=(),
                                              maximum=num_steps_per_skill,
                                              dtype='int64')
    return alf.nest.flatten(traj_spec) + [step_spec, skill_spec]
Exemplo n.º 16
0
    def test_encoding_network_preprocessing_combiner(self):
        input_spec = dict(a=TensorSpec((3, 80, 80)),
                          b=[TensorSpec((80, 80)),
                             TensorSpec(())])
        imgs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1)
        network = EncodingNetwork(input_tensor_spec=input_spec,
                                  preprocessing_combiner=NestSum(average=True),
                                  conv_layer_params=((1, 2, 2, 0), ))

        self.assertEqual(network._processed_input_tensor_spec,
                         TensorSpec((3, 80, 80)))

        output, _ = network(imgs)
        self.assertTensorEqual(output, torch.zeros((40 * 40, )))
Exemplo n.º 17
0
class TestNestSelectiveConcat(parameterized.TestCase, alf.test.TestCase):
    @parameterized.parameters(
        (NTuple(a=dict(x=1, y=0), b=0), torch.zeros((2, 3))),
        (NTuple(a=dict(x=0, y=1), b=0), torch.zeros((2, 4))),
        (NTuple(a=dict(x=0, y=0), b=1), torch.zeros((2, 10))),
        (NTuple(a=dict(x=1, y=1), b=0), torch.zeros((2, 7))),
        (NTuple(a=dict(x=1, y=0), b=1), torch.zeros((2, 13))),
        (NTuple(a=dict(x=0, y=1), b=1), torch.zeros((2, 14))),
        (NTuple(a=dict(x=1, y=1), b=1), torch.zeros((2, 17))),
        (None, torch.zeros((2, 17))),
    )
    def test_nest_selective_concat_tensors(self, mask, expected):
        ntuple = NTuple(
            a=dict(x=torch.zeros((2, 3)), y=torch.zeros((2, 4))),
            b=torch.zeros((2, 10)))
        ret = NestConcat(mask)(ntuple)
        self.assertTensorEqual(ret, expected)

    @parameterized.parameters(
        (NTuple(a=dict(x=1, y=0), b=0), TensorSpec((2, 3))),
        (NTuple(a=dict(x=0, y=1), b=0), TensorSpec((2, 4))),
        (NTuple(a=dict(x=0, y=0), b=1), TensorSpec((2, 10))),
        (NTuple(a=dict(x=1, y=1), b=0), TensorSpec((2, 7))),
        (NTuple(a=dict(x=1, y=0), b=1), TensorSpec((2, 13))),
        (NTuple(a=dict(x=0, y=1), b=1), TensorSpec((2, 14))),
        (NTuple(a=dict(x=1, y=1), b=1), TensorSpec((2, 17))),
        (None, TensorSpec((2, 17))),
    )
    def test_nest_selective_concat_specs(self, mask, expected):
        ntuple = NTuple(
            a=dict(x=TensorSpec((2, 3)), y=TensorSpec((2, 4))),
            b=TensorSpec((2, 10)))
        ret = NestConcat(mask)(ntuple)
        self.assertEqual(ret, expected)
Exemplo n.º 18
0
    def test_parallel_q_network(self):
        input_spec = TensorSpec([10])
        inputs = input_spec.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(None)

        q_net = network_ctor(input_spec, self._action_spec)
        n = 5
        parallel_q_net = q_net.make_parallel(n)

        q_value, _ = parallel_q_net(inputs, state)

        # (batch_size, n, num_actions)
        self.assertEqual(q_value.shape, (1, n, self._num_actions))
Exemplo n.º 19
0
    def test_uniform_projection_net(self):
        """A zero-weight net generates uniform actions."""
        input_spec = TensorSpec((10, ), torch.float32)
        embedding = input_spec.ones(outer_dims=(1, ))

        net = CategoricalProjectionNetwork(input_size=input_spec.shape[0],
                                           action_spec=BoundedTensorSpec(
                                               (1, ), minimum=0, maximum=4),
                                           logits_init_output_factor=0)
        dist, _ = net(embedding)
        self.assertTrue(isinstance(net.output_spec, DistributionSpec))
        self.assertEqual(dist.batch_shape, (1, ))
        self.assertEqual(dist.base_dist.batch_shape, (1, 1))
        self.assertTrue(torch.all(dist.base_dist.probs == 0.2))
Exemplo n.º 20
0
    def test_continuous_action(self):
        action_spec = TensorSpec((4, ))
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a zero action vector
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(0))
Exemplo n.º 21
0
    def test_close_uniform_projection_net(self):
        """A random-weight net generates close-uniform actions on average."""
        input_spec = TensorSpec((10, ), torch.float32)
        embeddings = input_spec.ones(outer_dims=(100, ))

        net = CategoricalProjectionNetwork(input_size=input_spec.shape[0],
                                           action_spec=BoundedTensorSpec(
                                               (3, 2), minimum=0, maximum=4),
                                           logits_init_output_factor=1.0)
        dists, _ = net(embeddings)
        self.assertEqual(dists.batch_shape, (100, ))
        self.assertEqual(dists.base_dist.batch_shape, (100, 3, 2))
        self.assertTrue(dists.base_dist.probs.std() > 0)
        self.assertTrue(
            torch.isclose(dists.base_dist.probs.mean(), torch.as_tensor(0.2)))
Exemplo n.º 22
0
class ICMAlgorithmTest(alf.test.TestCase):
    def setUp(self):
        self._input_tensor_spec = TensorSpec((10, ))
        self._time_step = TimeStep(
            step_type=StepType.MID,
            reward=0,
            discount=1,
            observation=self._input_tensor_spec.zeros(outer_dims=(1, )),
            prev_action=None,
            env_id=None)
        self._hidden_size = 100

    def test_discrete_action(self):
        action_spec = BoundedTensorSpec((),
                                        dtype=torch.int64,
                                        minimum=0,
                                        maximum=3)
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a uniform distribution
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(
                math.log(action_spec.maximum - action_spec.minimum + 1)),
            epsilon=1e-4)

    def test_continuous_action(self):
        action_spec = TensorSpec((4, ))
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a zero action vector
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(0))
Exemplo n.º 23
0
    def __init__(self, dim=2):
        super().__init__(
            input_tensor_spec=TensorSpec(shape=(dim, )), name="Net")

        self.fc = nn.Linear(3, dim, bias=False)
        w = torch.tensor([[1, 2], [-1, 1], [1, 1]], dtype=torch.float32)
        self.fc.weight = nn.Parameter(w.t())
Exemplo n.º 24
0
 def test_encoding_network_input_preprocessor(self):
     input_spec = TensorSpec((1, ))
     inputs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1)
     network = EncodingNetwork(input_tensor_spec=input_spec,
                               input_preprocessors=torch.tanh)
     output, _ = network(inputs)
     self.assertEqual(output.size()[1], 1)
Exemplo n.º 25
0
    def __init__(self,
                 dynamics_network: DynamicsNetwork,
                 n: int,
                 name="ParallelDynamicsNetwork"):
        """
        It create a parallelized version of ``DynamicsNetwork``.

        Args:
            dynamics_network (DynamicsNetwork): non-parallelized dynamics network
            n (int): make ``n`` replicas from ``dynamics_network`` with different
                initializations.
            name (str):
        """
        super().__init__(input_tensor_spec=dynamics_network.input_tensor_spec,
                         name=name)
        self._joint_encoder = dynamics_network._joint_encoder.make_parallel(n)
        self._prob = dynamics_network._prob
        if self._prob:
            self._projection_net = \
                            dynamics_network._projection_net.make_parallel(n)
        else:
            self._projection_net = None

        self._output_spec = TensorSpec((n, ) +
                                       dynamics_network.output_spec.shape)
Exemplo n.º 26
0
    def __init__(self, config: TrainerConfig):
        """Create a SLTrainer

        Args:
            config (TrainerConfig): configuration used to construct this trainer
        """
        super().__init__(config)

        assert config.num_iterations > 0, \
            "Must provide num_iterations for training!"

        self._num_epochs = config.num_iterations
        self._trainer_progress.set_termination_criterion(self._num_epochs)

        trainset, testset = self._create_dataset()
        input_tensor_spec = TensorSpec(shape=trainset.dataset[0][0].shape)
        if hasattr(trainset.dataset, 'classes'):
            output_dim = len(trainset.dataset.classes)
        else:
            output_dim = len(trainset.dataset[0][1])

        self._algorithm = config.algorithm_ctor(
            input_tensor_spec=input_tensor_spec,
            last_layer_param=(output_dim, True),
            last_activation=math_ops.identity,
            config=config)

        self._algorithm.set_data_loader(trainset, testset)
Exemplo n.º 27
0
 def _extract_spec(obj):
     if isinstance(obj, torch.Tensor):
         return TensorSpec.from_tensor(obj, from_dim)
     elif isinstance(obj, td.Distribution):
         return DistributionSpec.from_distribution(obj, from_dim)
     else:
         raise ValueError("Unsupported value type: %s" % type(obj))
Exemplo n.º 28
0
    def test_compute_jac_diag(self, hidden_layers=(2, ), input_size=5):
        """
        Check that the diagonal of input-output Jacobian computed by
        the direct (autograd-free) approach is consistent with the one
        computed by calling autograd.
        """
        batch_size = 2
        spec = TensorSpec((input_size, ))
        mlp = ReluMLP(spec, hidden_layers=hidden_layers)

        # compute jac diag using direct approach
        x = torch.randn(batch_size, input_size, requires_grad=True)
        x1 = x.detach().clone()
        x1.requires_grad = True
        jac_diag = mlp.compute_jac_diag(x1)

        # compute jac using autograd
        y, _ = mlp(x)
        jac = jacobian(y, x)
        jac_diag2 = []
        for i in range(batch_size):
            jac_diag2.append(torch.diag(jac[i, :, i, :]))
        jac_diag2 = torch.stack(jac_diag2, dim=0)

        self.assertArrayEqual(jac_diag, jac_diag2, 1e-6)
Exemplo n.º 29
0
    def test_discrete_actor_distribution(self, lstm_hidden_size):
        action_spec = TensorSpec((), torch.int32)
        network_ctor, state = self._init(lstm_hidden_size)

        # action_spec is not bounded
        self.assertRaises(AssertionError,
                          network_ctor,
                          self._input_spec,
                          action_spec,
                          conv_layer_params=self._conv_layer_params)

        action_spec = BoundedTensorSpec((), torch.int32)
        actor_dist_net = network_ctor(
            self._input_spec,
            action_spec,
            input_preprocessors=self._input_preprocessors,
            preprocessing_combiner=self._preprocessing_combiner,
            conv_layer_params=self._conv_layer_params)

        act_dist, _ = actor_dist_net(self._image, state)
        actions = act_dist.sample((100, ))

        self.assertTrue(
            isinstance(actor_dist_net.output_spec, DistributionSpec))

        # (num_samples, batch_size)
        self.assertEqual(actions.shape, (100, 1))

        self.assertTrue(
            torch.all(actions >= torch.as_tensor(action_spec.minimum)))
        self.assertTrue(
            torch.all(actions <= torch.as_tensor(action_spec.maximum)))
Exemplo n.º 30
0
    def test_encoding_network_nonimg(self, last_layer_size, last_activation,
                                     output_tensor_spec):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(1, ))

        if (last_layer_size is None and last_activation is not None) or (
                last_activation is None and last_layer_size is not None):
            with self.assertRaises(AssertionError):
                network = EncodingNetwork(
                    input_tensor_spec=input_spec,
                    output_tensor_spec=output_tensor_spec,
                    fc_layer_params=(30, 40, 50),
                    activation=torch.tanh,
                    last_layer_size=last_layer_size,
                    last_activation=last_activation)
        else:
            network = EncodingNetwork(input_tensor_spec=input_spec,
                                      output_tensor_spec=output_tensor_spec,
                                      fc_layer_params=(30, 40, 50),
                                      activation=torch.tanh,
                                      last_layer_size=last_layer_size,
                                      last_activation=last_activation)

            num_layers = 3 if last_layer_size is None else 4
            self.assertLen(list(network.parameters()), num_layers * 2)

            if last_activation is None:
                self.assertEqual(network._fc_layers[-1]._activation,
                                 torch.tanh)
            else:
                self.assertEqual(network._fc_layers[-1]._activation,
                                 last_activation)

            output, _ = network(embedding)

            if output_tensor_spec is None:
                if last_layer_size is None:
                    self.assertEqual(output.size()[1], 50)
                else:
                    self.assertEqual(output.size()[1], last_layer_size)
                self.assertEqual(network.output_spec.shape,
                                 tuple(output.size()[1:]))
            else:
                self.assertEqual(tuple(output.size()[1:]),
                                 output_tensor_spec.shape)
                self.assertEqual(network.output_spec.shape,
                                 output_tensor_spec.shape)