def setUp(self): super().setUp() self._input_spec = TensorSpec((1, )) self._epochs = 10 self._batch_size = 100 self._latent_dim = 2 self._loss_f = math_ops.square
def normalize_along_batch_dims(x, mean, variance, variance_epsilon): """Normalizes a tensor by ``mean`` and ``variance``, which are expected to have the same tensor spec with the inner dims of ``x``. Args: x (Tensor): a tensor of (``[D1, D2, ..] + shape``), where ``D1``, ``D2``, .. are arbitrary leading batch dims (can be empty). mean (Tensor): a tensor of ``shape`` variance (Tensor): a tensor of ``shape`` variance_epsilon (float): A small float number to avoid dividing by 0. Returns: Normalized tensor. """ spec = TensorSpec.from_tensor(mean) assert spec == TensorSpec.from_tensor(variance), \ "The specs of mean and variance must be equal!" bs = BatchSquash(get_outer_rank(x, spec)) x = bs.flatten(x) variance_epsilon = torch.as_tensor(variance_epsilon).to(variance.dtype) inv = torch.rsqrt(variance + variance_epsilon) x = (x - mean.to(x.dtype)) * inv.to(x.dtype) x = bs.unflatten(x) return x
def __init__(self, observation_spec, action_spec, skill_spec, env, config: TrainerConfig, num_steps_per_skill=5, rl_algorithm_cls=SacAlgorithm, rl_mini_batch_size=128, rl_mini_batch_length=2, rl_replay_buffer_length=20000, disc_mini_batch_size=64, disc_mini_batch_length=4, disc_replay_buffer_length=20000, gamma=0.99, optimizer=None, debug_summaries=False, name="SkillGenerator"): """ """ self._num_steps_per_skill = num_steps_per_skill self._observation_spec = observation_spec self._action_spec = action_spec self._skill_spec = skill_spec rl, discriminator = self._create_subalgorithms( rl_algorithm_cls, debug_summaries, env, config, rl_mini_batch_length, rl_mini_batch_size, rl_replay_buffer_length, disc_mini_batch_size, disc_mini_batch_length, disc_replay_buffer_length) discriminator.set_high_rl(rl) train_state_spec = SkillGeneratorState( discriminator=discriminator.train_state_spec, # for discriminator skill=self._skill_spec) # inputs to lower-level rollout_state_spec = train_state_spec._replace( rl=rl.train_state_spec, # higher-level policy rollout rl_reward=TensorSpec(()), # higher-level policy replay rl_discount=TensorSpec(()), # higher-level policy replay steps=TensorSpec((), dtype='int64')) predict_state_spec = train_state_spec._replace( rl=rl.predict_state_spec, # higher-level policy prediction steps=TensorSpec((), dtype='int64'), discriminator=discriminator.predict_state_spec) super().__init__(train_state_spec=train_state_spec, rollout_state_spec=rollout_state_spec, predict_state_spec=predict_state_spec, optimizer=optimizer, name=name) self._gamma = gamma self._discriminator = discriminator self._rl = rl self._rl_train = common.Periodically(self._rl.train_from_replay_buffer, period=1, name="periodic_higher_level")
def get_subtrajectory_spec(num_steps_per_skill, observation_spec, action_spec): observation_traj_spec = TensorSpec(shape=(num_steps_per_skill, ) + observation_spec.shape) action_traj_spec = TensorSpec(shape=(num_steps_per_skill, ) + action_spec.shape) return SubTrajectory(observation=observation_traj_spec, prev_action=action_traj_spec)
def test_param_network(self, batch_size=1): input_spec = TensorSpec((3, 32, 32), torch.float32) conv_layer_params = ((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1, 2)) fc_layer_params = ((128, True), ) last_layer_size = 10 last_activation = math_ops.identity network = ParamNetwork(input_spec, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, last_layer_param=(last_layer_size, True), last_activation=last_activation) self.assertLen(network._fc_layers, 2) # test non-parallel forward image = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(image) output_shape = (batch_size, last_layer_size) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # test parallel forward replica = 2 image = input_spec.zeros(outer_dims=(batch_size, )) replica_image = input_spec.zeros(outer_dims=(batch_size, replica)) params = torch.randn(replica, network.param_length) network.set_parameters(params) output, _ = network(image) replica_output, _ = network(replica_image) self.assertEqual(output.shape, replica_output.shape) output_shape = (batch_size, replica, last_layer_size) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
def test_data_buffer(self): dim = 20 capacity = 256 data_spec = (TensorSpec(shape=()), TensorSpec(shape=(dim // 3 - 1, )), TensorSpec(shape=(dim - dim // 3, ))) data_buffer = DataBuffer(data_spec=data_spec, capacity=capacity) def _get_batch(batch_size): x = torch.randn(batch_size, dim, requires_grad=True) x = (x[:, 0], x[:, 1:dim // 3], x[..., dim // 3:]) return x data_buffer.add_batch(_get_batch(100)) self.assertEqual(int(data_buffer.current_size), 100) batch = _get_batch(1000) # test that the created batch has gradients self.assertTrue(batch[0].requires_grad) data_buffer.add_batch(batch) ret = data_buffer.get_batch(2) # test that DataBuffer detaches gradients of inputs self.assertFalse(ret[0].requires_grad) self.assertEqual(int(data_buffer.current_size), capacity) ret = data_buffer.get_batch_by_indices(torch.arange(capacity)) self.assertEqual(ret[0], batch[0][-capacity:]) self.assertEqual(ret[1], batch[1][-capacity:]) self.assertEqual(ret[2], batch[2][-capacity:]) batch = _get_batch(100) data_buffer.add_batch(batch) ret = data_buffer.get_batch_by_indices( torch.arange(data_buffer.current_size - 100, data_buffer.current_size)) self.assertEqual(ret[0], batch[0]) self.assertEqual(ret[1], batch[1]) self.assertEqual(ret[2], batch[2][-capacity:]) # Test checkpoint working with tempfile.TemporaryDirectory() as checkpoint_directory: checkpoint = Checkpointer(checkpoint_directory, data_buffer=data_buffer) checkpoint.save(10) data_buffer = DataBuffer(data_spec=data_spec, capacity=capacity) checkpoint = Checkpointer(checkpoint_directory, data_buffer=data_buffer) global_step = checkpoint.load() self.assertEqual(global_step, 10) ret = data_buffer.get_batch_by_indices( torch.arange(data_buffer.current_size - 100, data_buffer.current_size)) self.assertEqual(ret[0], batch[0]) self.assertEqual(ret[1], batch[1]) self.assertEqual(ret[2], batch[2][-capacity:]) data_buffer.clear() self.assertEqual(int(data_buffer.current_size), 0)
def test_noinit_copy_works(self): # pass a TensorSpec to prevent assertion error in Network network1 = NoInitNetwork(TensorSpec([2]), 1) network2 = network1.copy() self.assertNotEqual(network1, network2) self.assertEqual(TensorSpec([2]), network2.param1) self.assertEqual(1, network2.param2) self.assertEqual(2, network2.kwarg1) self.assertEqual(3, network2.kwarg2)
def setUp(self): self._input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=StepType.MID, reward=0, discount=1, observation=self._input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._hidden_size = 100
def setUp(self): input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=torch.tensor(StepType.MID, dtype=torch.int32), reward=0, discount=1, observation=input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._encoding_net = EncodingNetwork( input_tensor_spec=input_tensor_spec)
def test_parallel_image_encoding_network(self, same_padding, flatten_output): input_spec = TensorSpec((3, 80, 80), torch.float32) replica = 2 network = ParallelImageEncodingNetwork( input_channels=input_spec.shape[0], input_size=input_spec.shape[1:3], n=replica, conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0)), same_padding=same_padding, flatten_output=flatten_output) self.assertLen(list(network.parameters()), 4) batch_size = 3 # 1) shared input case img = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(img) if same_padding: output_shape = (batch_size, replica, 15, 20, 20) else: output_shape = (batch_size, replica, 15, 19, 19) if flatten_output: self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])), network.output_spec.shape) self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])), tuple(output.size())) else: self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # 2) non-shared input case img = input_spec.zeros(outer_dims=( batch_size, replica, )) output, _ = network(img) if same_padding: output_shape = (batch_size, replica, 15, 20, 20) else: output_shape = (batch_size, replica, 15, 19, 19) if flatten_output: self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])), network.output_spec.shape) self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])), tuple(output.size())) else: self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
def setUp(self): self._input_spec = [ TensorSpec((3, 20, 20), torch.float32), TensorSpec((1, 20, 20), torch.float32) ] self._image = zero_tensor_from_nested_spec(self._input_spec, batch_size=1) self._conv_layer_params = ((8, 3, 1), (16, 3, 2, 1)) self._fc_layer_params = (100, ) self._input_preprocessors = [torch.tanh, None] self._preprocessing_combiner = NestConcat(dim=1)
def test_encoding_network_img(self): input_spec = TensorSpec((3, 80, 80), torch.float32) img = input_spec.zeros(outer_dims=(1, )) network = EncodingNetwork(input_tensor_spec=input_spec, conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0))) self.assertLen(list(network.parameters()), 4) output, _ = network(img) output_spec = network._img_encoding_net.output_spec self.assertEqual(output.shape[-1], np.prod(output_spec.shape))
def __init__(self, dim=2): super().__init__(input_tensor_spec=[ TensorSpec(shape=(dim, )), TensorSpec(shape=(dim, )) ], name="Net") self.fc1 = nn.Linear(dim, dim, bias=False) self.fc2 = nn.Linear(dim, dim, bias=False) w = torch.tensor([[1, 2], [1, 1]], dtype=torch.float32) u = torch.zeros((dim, dim), dtype=torch.float32) self.fc1.weight = nn.Parameter(w.t()) self.fc2.weight = nn.Parameter(u.t())
def test_continuous_skill_loss(self): skill_spec = TensorSpec((4, )) alg = DIAYNAlgorithm(skill_spec=skill_spec, encoding_net=self._encoding_net) skill = state = skill_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace( observation=[self._time_step.observation, skill]), state) # the discriminator should predict a zero skill vector self.assertTensorClose(torch.sum(alg_step.info.loss), torch.as_tensor(0))
def get_low_rl_input_spec(observation_spec, action_spec, num_steps_per_skill, skill_spec): assert observation_spec.ndim == 1 and action_spec.ndim == 1 concat_observation_spec = TensorSpec( (num_steps_per_skill * observation_spec.shape[0], )) concat_action_spec = TensorSpec( (num_steps_per_skill * action_spec.shape[0], )) traj_spec = SubTrajectory(observation=concat_observation_spec, prev_action=concat_action_spec) step_spec = step_spec = BoundedTensorSpec(shape=(), maximum=num_steps_per_skill, dtype='int64') return alf.nest.flatten(traj_spec) + [step_spec, skill_spec]
def test_encoding_network_preprocessing_combiner(self): input_spec = dict(a=TensorSpec((3, 80, 80)), b=[TensorSpec((80, 80)), TensorSpec(())]) imgs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1) network = EncodingNetwork(input_tensor_spec=input_spec, preprocessing_combiner=NestSum(average=True), conv_layer_params=((1, 2, 2, 0), )) self.assertEqual(network._processed_input_tensor_spec, TensorSpec((3, 80, 80))) output, _ = network(imgs) self.assertTensorEqual(output, torch.zeros((40 * 40, )))
class TestNestSelectiveConcat(parameterized.TestCase, alf.test.TestCase): @parameterized.parameters( (NTuple(a=dict(x=1, y=0), b=0), torch.zeros((2, 3))), (NTuple(a=dict(x=0, y=1), b=0), torch.zeros((2, 4))), (NTuple(a=dict(x=0, y=0), b=1), torch.zeros((2, 10))), (NTuple(a=dict(x=1, y=1), b=0), torch.zeros((2, 7))), (NTuple(a=dict(x=1, y=0), b=1), torch.zeros((2, 13))), (NTuple(a=dict(x=0, y=1), b=1), torch.zeros((2, 14))), (NTuple(a=dict(x=1, y=1), b=1), torch.zeros((2, 17))), (None, torch.zeros((2, 17))), ) def test_nest_selective_concat_tensors(self, mask, expected): ntuple = NTuple( a=dict(x=torch.zeros((2, 3)), y=torch.zeros((2, 4))), b=torch.zeros((2, 10))) ret = NestConcat(mask)(ntuple) self.assertTensorEqual(ret, expected) @parameterized.parameters( (NTuple(a=dict(x=1, y=0), b=0), TensorSpec((2, 3))), (NTuple(a=dict(x=0, y=1), b=0), TensorSpec((2, 4))), (NTuple(a=dict(x=0, y=0), b=1), TensorSpec((2, 10))), (NTuple(a=dict(x=1, y=1), b=0), TensorSpec((2, 7))), (NTuple(a=dict(x=1, y=0), b=1), TensorSpec((2, 13))), (NTuple(a=dict(x=0, y=1), b=1), TensorSpec((2, 14))), (NTuple(a=dict(x=1, y=1), b=1), TensorSpec((2, 17))), (None, TensorSpec((2, 17))), ) def test_nest_selective_concat_specs(self, mask, expected): ntuple = NTuple( a=dict(x=TensorSpec((2, 3)), y=TensorSpec((2, 4))), b=TensorSpec((2, 10))) ret = NestConcat(mask)(ntuple) self.assertEqual(ret, expected)
def test_parallel_q_network(self): input_spec = TensorSpec([10]) inputs = input_spec.zeros(outer_dims=(1, )) network_ctor, state = self._init(None) q_net = network_ctor(input_spec, self._action_spec) n = 5 parallel_q_net = q_net.make_parallel(n) q_value, _ = parallel_q_net(inputs, state) # (batch_size, n, num_actions) self.assertEqual(q_value.shape, (1, n, self._num_actions))
def test_uniform_projection_net(self): """A zero-weight net generates uniform actions.""" input_spec = TensorSpec((10, ), torch.float32) embedding = input_spec.ones(outer_dims=(1, )) net = CategoricalProjectionNetwork(input_size=input_spec.shape[0], action_spec=BoundedTensorSpec( (1, ), minimum=0, maximum=4), logits_init_output_factor=0) dist, _ = net(embedding) self.assertTrue(isinstance(net.output_spec, DistributionSpec)) self.assertEqual(dist.batch_shape, (1, )) self.assertEqual(dist.base_dist.batch_shape, (1, 1)) self.assertTrue(torch.all(dist.base_dist.probs == 0.2))
def test_continuous_action(self): action_spec = TensorSpec((4, )) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a zero action vector self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor(0))
def test_close_uniform_projection_net(self): """A random-weight net generates close-uniform actions on average.""" input_spec = TensorSpec((10, ), torch.float32) embeddings = input_spec.ones(outer_dims=(100, )) net = CategoricalProjectionNetwork(input_size=input_spec.shape[0], action_spec=BoundedTensorSpec( (3, 2), minimum=0, maximum=4), logits_init_output_factor=1.0) dists, _ = net(embeddings) self.assertEqual(dists.batch_shape, (100, )) self.assertEqual(dists.base_dist.batch_shape, (100, 3, 2)) self.assertTrue(dists.base_dist.probs.std() > 0) self.assertTrue( torch.isclose(dists.base_dist.probs.mean(), torch.as_tensor(0.2)))
class ICMAlgorithmTest(alf.test.TestCase): def setUp(self): self._input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=StepType.MID, reward=0, discount=1, observation=self._input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._hidden_size = 100 def test_discrete_action(self): action_spec = BoundedTensorSpec((), dtype=torch.int64, minimum=0, maximum=3) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a uniform distribution self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor( math.log(action_spec.maximum - action_spec.minimum + 1)), epsilon=1e-4) def test_continuous_action(self): action_spec = TensorSpec((4, )) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a zero action vector self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor(0))
def __init__(self, dim=2): super().__init__( input_tensor_spec=TensorSpec(shape=(dim, )), name="Net") self.fc = nn.Linear(3, dim, bias=False) w = torch.tensor([[1, 2], [-1, 1], [1, 1]], dtype=torch.float32) self.fc.weight = nn.Parameter(w.t())
def test_encoding_network_input_preprocessor(self): input_spec = TensorSpec((1, )) inputs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1) network = EncodingNetwork(input_tensor_spec=input_spec, input_preprocessors=torch.tanh) output, _ = network(inputs) self.assertEqual(output.size()[1], 1)
def __init__(self, dynamics_network: DynamicsNetwork, n: int, name="ParallelDynamicsNetwork"): """ It create a parallelized version of ``DynamicsNetwork``. Args: dynamics_network (DynamicsNetwork): non-parallelized dynamics network n (int): make ``n`` replicas from ``dynamics_network`` with different initializations. name (str): """ super().__init__(input_tensor_spec=dynamics_network.input_tensor_spec, name=name) self._joint_encoder = dynamics_network._joint_encoder.make_parallel(n) self._prob = dynamics_network._prob if self._prob: self._projection_net = \ dynamics_network._projection_net.make_parallel(n) else: self._projection_net = None self._output_spec = TensorSpec((n, ) + dynamics_network.output_spec.shape)
def __init__(self, config: TrainerConfig): """Create a SLTrainer Args: config (TrainerConfig): configuration used to construct this trainer """ super().__init__(config) assert config.num_iterations > 0, \ "Must provide num_iterations for training!" self._num_epochs = config.num_iterations self._trainer_progress.set_termination_criterion(self._num_epochs) trainset, testset = self._create_dataset() input_tensor_spec = TensorSpec(shape=trainset.dataset[0][0].shape) if hasattr(trainset.dataset, 'classes'): output_dim = len(trainset.dataset.classes) else: output_dim = len(trainset.dataset[0][1]) self._algorithm = config.algorithm_ctor( input_tensor_spec=input_tensor_spec, last_layer_param=(output_dim, True), last_activation=math_ops.identity, config=config) self._algorithm.set_data_loader(trainset, testset)
def _extract_spec(obj): if isinstance(obj, torch.Tensor): return TensorSpec.from_tensor(obj, from_dim) elif isinstance(obj, td.Distribution): return DistributionSpec.from_distribution(obj, from_dim) else: raise ValueError("Unsupported value type: %s" % type(obj))
def test_compute_jac_diag(self, hidden_layers=(2, ), input_size=5): """ Check that the diagonal of input-output Jacobian computed by the direct (autograd-free) approach is consistent with the one computed by calling autograd. """ batch_size = 2 spec = TensorSpec((input_size, )) mlp = ReluMLP(spec, hidden_layers=hidden_layers) # compute jac diag using direct approach x = torch.randn(batch_size, input_size, requires_grad=True) x1 = x.detach().clone() x1.requires_grad = True jac_diag = mlp.compute_jac_diag(x1) # compute jac using autograd y, _ = mlp(x) jac = jacobian(y, x) jac_diag2 = [] for i in range(batch_size): jac_diag2.append(torch.diag(jac[i, :, i, :])) jac_diag2 = torch.stack(jac_diag2, dim=0) self.assertArrayEqual(jac_diag, jac_diag2, 1e-6)
def test_discrete_actor_distribution(self, lstm_hidden_size): action_spec = TensorSpec((), torch.int32) network_ctor, state = self._init(lstm_hidden_size) # action_spec is not bounded self.assertRaises(AssertionError, network_ctor, self._input_spec, action_spec, conv_layer_params=self._conv_layer_params) action_spec = BoundedTensorSpec((), torch.int32) actor_dist_net = network_ctor( self._input_spec, action_spec, input_preprocessors=self._input_preprocessors, preprocessing_combiner=self._preprocessing_combiner, conv_layer_params=self._conv_layer_params) act_dist, _ = actor_dist_net(self._image, state) actions = act_dist.sample((100, )) self.assertTrue( isinstance(actor_dist_net.output_spec, DistributionSpec)) # (num_samples, batch_size) self.assertEqual(actions.shape, (100, 1)) self.assertTrue( torch.all(actions >= torch.as_tensor(action_spec.minimum))) self.assertTrue( torch.all(actions <= torch.as_tensor(action_spec.maximum)))
def test_encoding_network_nonimg(self, last_layer_size, last_activation, output_tensor_spec): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(1, )) if (last_layer_size is None and last_activation is not None) or ( last_activation is None and last_layer_size is not None): with self.assertRaises(AssertionError): network = EncodingNetwork( input_tensor_spec=input_spec, output_tensor_spec=output_tensor_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh, last_layer_size=last_layer_size, last_activation=last_activation) else: network = EncodingNetwork(input_tensor_spec=input_spec, output_tensor_spec=output_tensor_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh, last_layer_size=last_layer_size, last_activation=last_activation) num_layers = 3 if last_layer_size is None else 4 self.assertLen(list(network.parameters()), num_layers * 2) if last_activation is None: self.assertEqual(network._fc_layers[-1]._activation, torch.tanh) else: self.assertEqual(network._fc_layers[-1]._activation, last_activation) output, _ = network(embedding) if output_tensor_spec is None: if last_layer_size is None: self.assertEqual(output.size()[1], 50) else: self.assertEqual(output.size()[1], last_layer_size) self.assertEqual(network.output_spec.shape, tuple(output.size()[1:])) else: self.assertEqual(tuple(output.size()[1:]), output_tensor_spec.shape) self.assertEqual(network.output_spec.shape, output_tensor_spec.shape)