Exemplo n.º 1
0
    def test_sac_algorithm_init(self):
        observation_spec = BoundedTensorSpec((10, ))
        discrete_action_spec = BoundedTensorSpec((), dtype='int64')
        continuous_action_spec = [
            BoundedTensorSpec((3, )),
            BoundedTensorSpec((10, ))
        ]

        universal_q_network = partial(
            QNetwork, preprocessing_combiner=NestConcat())
        critic_network = partial(
            CriticNetwork, action_preprocessing_combiner=NestConcat())

        # q_network instead of critic_network is needed
        self.assertRaises(
            AssertionError,
            SacAlgorithm,
            observation_spec=observation_spec,
            action_spec=discrete_action_spec,
            q_network_cls=None)

        sac = SacAlgorithm(
            observation_spec=observation_spec,
            action_spec=discrete_action_spec,
            q_network_cls=QNetwork)
        self.assertEqual(sac._act_type, SacActionType.Discrete)
        self.assertEqual(sac.train_state_spec.actor, ())
        self.assertEqual(sac.train_state_spec.action.actor_network, ())

        # critic_network instead of q_network is needed
        self.assertRaises(
            AssertionError,
            SacAlgorithm,
            observation_spec=observation_spec,
            action_spec=continuous_action_spec,
            critic_network_cls=None)

        sac = SacAlgorithm(
            observation_spec=observation_spec,
            action_spec=continuous_action_spec,
            critic_network_cls=critic_network)
        self.assertEqual(sac._act_type, SacActionType.Continuous)
        self.assertEqual(sac.train_state_spec.action.critic, ())

        # action_spec order is incorrect
        self.assertRaises(
            AssertionError,
            SacAlgorithm,
            observation_spec=observation_spec,
            action_spec=(continuous_action_spec, discrete_action_spec),
            q_network_cls=universal_q_network)

        sac = SacAlgorithm(
            observation_spec=observation_spec,
            action_spec=(discrete_action_spec, continuous_action_spec),
            q_network_cls=universal_q_network)
        self.assertEqual(sac._act_type, SacActionType.Mixed)
        self.assertEqual(sac.train_state_spec.actor, ())
Exemplo n.º 2
0
    def test_value_distribution(self, lstm_hidden_size):
        input_spec1 = TensorSpec((3, 20, 20))
        input_spec2 = TensorSpec((100, ))
        conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
        embedding_dim = 100

        image = input_spec1.zeros(outer_dims=(1, ))
        vector = input_spec2.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(lstm_hidden_size)

        value_net = network_ctor(
            input_tensor_spec=[input_spec1, input_spec2],
            input_preprocessors=[
                EmbeddingPreprocessor(
                    input_spec1,
                    embedding_dim=embedding_dim,
                    conv_layer_params=conv_layer_params), None
            ],
            preprocessing_combiner=NestConcat())

        value, state = value_net([image, vector], state)

        self.assertEqual(value_net._processed_input_tensor_spec.shape[0], 200)
        self.assertEqual(value_net.output_spec, TensorSpec(()))
        # (batch_size,)
        self.assertEqual(value.shape, (1, ))
Exemplo n.º 3
0
    def __init__(self,
                 train_state_spec,
                 action_spec,
                 feature_spec,
                 hidden_size=256,
                 num_replicas=1,
                 dynamics_network: DynamicsNetwork = None,
                 name="DynamicsLearningAlgorithm"):
        """Create a DynamicsLearningAlgorithm.

        Args:
            hidden_size (int|tuple): size of hidden layer(s)
            dynamics_network (Network): network for predicting the change of
                the next feature based on the previous feature and action.
                It should accept input with spec of the format
                [feature_spec, encoded_action_spec] and output a tensor of the
                shape feature_spec. For discrete action case, encoded_action
                is a one-hot representation of the action. For continuous
                action, encoded action is the original action.
        """
        super().__init__(train_state_spec=train_state_spec, name=name)

        flat_action_spec = nest.flatten(action_spec)
        assert len(flat_action_spec) == 1, "doesn't support nested action_spec"

        flat_feature_spec = nest.flatten(feature_spec)
        assert len(
            flat_feature_spec) == 1, "doesn't support nested feature_spec"

        action_spec = flat_action_spec[0]

        if action_spec.is_discrete:
            self._num_actions = action_spec.maximum - action_spec.minimum + 1
        else:
            self._num_actions = action_spec.shape[-1]

        self._action_spec = action_spec
        self._feature_spec = feature_spec
        self._num_replicas = num_replicas

        if isinstance(hidden_size, int):
            hidden_size = (hidden_size, )

        if dynamics_network is None:
            encoded_action_spec = TensorSpec((self._num_actions, ),
                                             dtype=torch.float32)
            dynamics_network = DynamicsNetwork(
                name="dynamics_net",
                input_tensor_spec=(feature_spec, encoded_action_spec),
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                output_tensor_spec=flat_feature_spec[0])

        if num_replicas > 1:
            self._dynamics_network = dynamics_network.make_parallel(
                num_replicas)
        else:
            self._dynamics_network = dynamics_network
 def setUp(self):
     self._input_spec = [
         TensorSpec((3, 20, 20), torch.float32),
         TensorSpec((1, 20, 20), torch.float32)
     ]
     self._image = zero_tensor_from_nested_spec(self._input_spec,
                                                batch_size=1)
     self._conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
     self._fc_layer_params = (100, )
     self._input_preprocessors = [torch.tanh, None]
     self._preprocessing_combiner = NestConcat(dim=1)
Exemplo n.º 5
0
    def test_mixed_actions(self, net_ctor):
        obs_spec = TensorSpec((20, ))
        action_spec = dict(x=BoundedTensorSpec((), dtype='int64'),
                           y=BoundedTensorSpec((3, )))

        input_preprocessors = dict(x=EmbeddingPreprocessor(action_spec['x'],
                                                           embedding_dim=10),
                                   y=None)

        net_ctor = functools.partial(
            net_ctor, action_input_processors=input_preprocessors)

        # doesn't support mixed actions
        self.assertRaises(AssertionError, net_ctor, (obs_spec, action_spec))

        # ... unless a combiner is specified
        net_ctor((obs_spec, action_spec),
                 action_preprocessing_combiner=NestConcat())
Exemplo n.º 6
0
    def test_encoding_network_nested_input(self, lstm):
        input_spec = dict(a=TensorSpec((3, 80, 80)),
                          b=[
                              TensorSpec((80, )),
                              BoundedTensorSpec((), dtype="int64"),
                              dict(x=TensorSpec((100, )),
                                   y=TensorSpec((200, )))
                          ])
        imgs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1)
        input_preprocessors = dict(
            a=EmbeddingPreprocessor(input_spec["a"],
                                    conv_layer_params=((1, 2, 2, 0), ),
                                    embedding_dim=100),
            b=[
                EmbeddingPreprocessor(input_spec["b"][0], embedding_dim=50),
                EmbeddingPreprocessor(input_spec["b"][1], embedding_dim=50),
                dict(x=None, y=torch.relu)
            ])

        if lstm:
            network_ctor = functools.partial(LSTMEncodingNetwork,
                                             hidden_size=(100, ))
        else:
            network_ctor = EncodingNetwork

        network = network_ctor(input_tensor_spec=input_spec,
                               input_preprocessors=input_preprocessors,
                               preprocessing_combiner=NestConcat())
        output, _ = network(imgs, state=[(torch.zeros((1, 100)), ) * 2])

        if lstm:
            self.assertEqual(network.output_spec, TensorSpec((100, )))
            self.assertEqual(output.size()[-1], 100)
        else:
            self.assertEqual(len(list(network.parameters())), 4 + 2 + 1)
            self.assertEqual(network.output_spec, TensorSpec((500, )))
            self.assertEqual(output.size()[-1], 500)
Exemplo n.º 7
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 skill_spec,
                 config: TrainerConfig,
                 skill_discriminator_ctor=EncodingNetwork,
                 skill_encoder_ctor=None,
                 observation_transformer=math_ops.identity,
                 optimizer=None,
                 sparse_reward=False,
                 debug_summaries=False,
                 num_steps_per_skill=3,
                 skill_type="state_difference",
                 name="Discriminator"):
        """If ``sparse_reward=True``, then the discriminator will only predict
        at the skill switching steps.
        """
        if skill_spec.is_discrete:
            assert isinstance(skill_spec, BoundedTensorSpec)
            skill_dim = skill_spec.maximum - skill_spec.minimum + 1
        else:
            assert len(
                skill_spec.shape) == 1, "Only 1D skill vector is supported"
            skill_dim = skill_spec.shape[0]

        supported_skill_types = [
            "state_concatenation",
            "state_difference",
            "state",
            "action",
            "action_difference",
            "state_action",
            "action_concatenation",
        ]
        assert skill_type in supported_skill_types, (
            "Skill type must be in: %s" % supported_skill_types)

        self._skill_type = skill_type

        subtrajectory_spec = get_subtrajectory_spec(num_steps_per_skill,
                                                    observation_spec,
                                                    action_spec)

        if skill_type == "state_concatenation":
            discriminator_spec = flatten(subtrajectory_spec.observation)
        elif skill_type == "action_concatenation":
            discriminator_spec = flatten(subtrajectory_spec.prev_action)
        else:
            discriminator_spec = get_discriminator_spec(
                skill_type, observation_spec, action_spec)

        input_preprocessors, preprocessing_combiner = None, None
        if is_action_skill(skill_type):
            # first project
            input_preprocessors = (None, None)
            preprocessing_combiner = NestConcat()
            discriminator_spec = (observation_spec, discriminator_spec)

        skill_encoder = None
        if skill_encoder_ctor is not None:
            step_spec = BoundedTensorSpec((),
                                          maximum=num_steps_per_skill,
                                          dtype='int64')
            skill_encoder = skill_encoder_ctor(
                input_preprocessors=(None,
                                     EmbeddingPreprocessor(
                                         input_tensor_spec=step_spec,
                                         embedding_dim=skill_dim)),
                preprocessing_combiner=NestConcat(),
                input_tensor_spec=(skill_spec, step_spec))
            if input_preprocessors is None:
                input_preprocessors = (None, )
                discriminator_spec = (discriminator_spec, )
            input_preprocessors = input_preprocessors + (EmbeddingPreprocessor(
                input_tensor_spec=step_spec, embedding_dim=skill_dim), )
            discriminator_spec = discriminator_spec + (step_spec, )
            skill_dim = skill_encoder.output_spec.shape[0]

        skill_disc_inputs = dict(input_preprocessors=input_preprocessors,
                                 preprocessing_combiner=preprocessing_combiner,
                                 input_tensor_spec=discriminator_spec)

        if skill_discriminator_ctor.__name__ == "EncodingNetwork":
            skill_disc_inputs["last_layer_size"] = skill_dim
        else:  # ActorDistributionNetwork
            skill_disc_inputs["action_spec"] = skill_spec

        skill_discriminator = skill_discriminator_ctor(**skill_disc_inputs)

        train_state_spec = DiscriminatorState(
            first_observation=observation_spec,
            untrans_observation=
            observation_spec,  # prev untransformed observation diff for pred
            subtrajectory=subtrajectory_spec)

        super().__init__(train_state_spec=train_state_spec,
                         predict_state_spec=DiscriminatorState(
                             subtrajectory=subtrajectory_spec,
                             first_observation=observation_spec),
                         config=config,
                         optimizer=optimizer,
                         debug_summaries=debug_summaries,
                         name=name)

        self._skill_discriminator = skill_discriminator
        self._skill_encoder = skill_encoder
        # exp observation won't be automatically transformed when it's sampled
        # from the replay buffer. We will do this manually.
        self._observation_transformer = observation_transformer
        self._num_steps_per_skill = num_steps_per_skill
        self._sparse_reward = sparse_reward
        self._skill_dim = skill_dim
        self._high_rl = None
Exemplo n.º 8
0
    def __init__(self,
                 x_spec,
                 y_spec,
                 model=None,
                 fc_layers=(256, ),
                 sampler='buffer',
                 buffer_size=65536,
                 optimizer: torch.optim.Optimizer = None,
                 estimator_type='DV',
                 averager: EMAverager = None,
                 name="MIEstimator"):
        """

        Args:
            x_spec (nested TensorSpec): spec of ``x``
            y_spec (nested TensorSpec): spec of ``y``
            model (Network): can be called as ``model([x, y])`` and return a Tensor
                with ``shape=[batch_size, 1]``. If None, a default MLP with
                ``fc_layers`` will be created.
            fc_layers (tuple[int]): size of hidden layers. Only used if model is
                None.
            sampler (str): type of sampler used to get samples from marginal
                distribution, should be one of ``['buffer', 'double_buffer',
                'shuffle', 'shift']``.
            buffer_size (int): capacity of buffer for storing y for sampler
                'buffer' and 'double_buffer'.
            optimzer (torch.optim.Optimzer): optimizer
            estimator_type (str): one of 'DV', 'KLD' or 'JSD'
            averager (EMAverager): averager used to maintain a moving average
                of :math:`exp(T)`. Only used for 'DV' estimator. If None, 
                a ScalarAdaptiveAverager will be created.
            name (str): name of this estimator
        """
        assert estimator_type in ['ML', 'DV', 'KLD', 'JSD'
                                  ], "Wrong estimator_type %s" % estimator_type
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)
        self._x_spec = x_spec
        self._y_spec = y_spec
        if model is None:
            if estimator_type == 'ML':
                model = EncodingNetwork(
                    name="MIEstimator",
                    input_tensor_spec=x_spec,
                    fc_layer_params=fc_layers,
                    preprocessing_combiner=NestConcat(dim=-1))
            else:
                model = EncodingNetwork(
                    name="MIEstimator",
                    input_tensor_spec=[x_spec, y_spec],
                    preprocessing_combiner=NestConcat(dim=-1),
                    fc_layer_params=fc_layers,
                    last_layer_size=1,
                    last_activation=math_ops.identity)
        self._model = model
        self._type = estimator_type
        if sampler == 'buffer':
            self._y_buffer = DataBuffer(y_spec, capacity=buffer_size)
            self._sampler = self._buffer_sampler
        elif sampler == 'double_buffer':
            self._x_buffer = DataBuffer(x_spec, capacity=buffer_size)
            self._y_buffer = DataBuffer(y_spec, capacity=buffer_size)
            self._sampler = self._double_buffer_sampler
        elif sampler == 'shuffle':
            self._sampler = self._shuffle_sampler
        elif sampler == 'shift':
            self._sampler = self._shift_sampler
        else:
            raise TypeError("Wrong type for sampler %s" % sampler)

        if estimator_type == 'DV':
            if averager is None:
                averager = ScalarAdaptiveAverager()
            self._mean_averager = averager
        if estimator_type == 'ML':
            assert isinstance(
                y_spec,
                alf.TensorSpec), ("Currently, 'ML' does "
                                  "not support nested y_spec: %s" % y_spec)
            assert y_spec.is_continuous, ("Currently, 'ML' does "
                                          "not support discreted y_spec: %s" %
                                          y_spec)
            hidden_size = self._model.output_spec.shape[-1]
            self._delta_loc_layer = alf.layers.FC(
                hidden_size,
                y_spec.shape[-1],
                kernel_initializer=torch.nn.init.zeros_,
                bias_init_value=0.0)
            self._delta_scale_layer = alf.layers.FC(
                hidden_size,
                y_spec.shape[-1],
                kernel_initializer=torch.nn.init.zeros_,
                bias_init_value=math.log(math.e - 1))
Exemplo n.º 9
0
    def test_sac_algorithm_mixed(self, use_parallel_network):
        num_env = 1
        config = TrainerConfig(
            root_dir="dummy",
            unroll_length=1,
            mini_batch_length=2,
            mini_batch_size=64,
            initial_collect_steps=500,
            whole_replay_buffer_training=False,
            clear_replay_buffer=False,
            num_envs=num_env,
        )
        env_class = MixedPolicyUnittestEnv

        steps_per_episode = 13
        env = env_class(num_env, steps_per_episode)

        eval_env = env_class(100, steps_per_episode)

        obs_spec = env._observation_spec
        action_spec = env._action_spec

        fc_layer_params = (10, 10, 10)

        continuous_projection_net_ctor = partial(
            alf.networks.NormalProjectionNetwork,
            state_dependent_std=True,
            scale_distribution=True,
            std_transform=clipped_exp)

        actor_network = partial(
            ActorDistributionNetwork,
            fc_layer_params=fc_layer_params,
            continuous_projection_net_ctor=continuous_projection_net_ctor)

        q_network = partial(QNetwork,
                            preprocessing_combiner=NestConcat(),
                            fc_layer_params=fc_layer_params)

        alg2 = SacAlgorithm(observation_spec=obs_spec,
                            action_spec=action_spec,
                            actor_network_cls=actor_network,
                            q_network_cls=q_network,
                            use_parallel_network=use_parallel_network,
                            env=env,
                            config=config,
                            actor_optimizer=alf.optimizers.Adam(lr=1e-2),
                            critic_optimizer=alf.optimizers.Adam(lr=1e-2),
                            alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
                            debug_summaries=False,
                            name="MySAC")

        eval_env.reset()
        for i in range(700):
            alg2.train_iter()
            if i < config.initial_collect_steps:
                continue

            eval_env.reset()
            eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
            logging.log_every_n_seconds(
                logging.INFO,
                "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
                n_seconds=1)

        self.assertAlmostEqual(1.0,
                               float(eval_time_step.reward.mean()),
                               delta=0.2)
Exemplo n.º 10
0
 def test_nest_selective_concat_specs(self, mask, expected):
     ntuple = NTuple(
         a=dict(x=TensorSpec((2, 3)), y=TensorSpec((2, 4))),
         b=TensorSpec((2, 10)))
     ret = NestConcat(mask)(ntuple)
     self.assertEqual(ret, expected)
Exemplo n.º 11
0
 def test_nest_concat_specs(self):
     ntuple = NTuple(
         a=dict(x=TensorSpec((2, 3)), y=TensorSpec((2, 4))),
         b=TensorSpec((2, 10)))
     ret = NestConcat()(ntuple)
     self.assertEqual(ret, TensorSpec((2, 17)))
Exemplo n.º 12
0
 def test_nest_concat_tensors(self):
     ntuple = NTuple(
         a=dict(x=torch.zeros((2, 3)), y=torch.zeros((2, 4))),
         b=torch.zeros((2, 10)))
     ret = NestConcat()(ntuple)
     self.assertTensorEqual(ret, torch.zeros((2, 17)))
Exemplo n.º 13
0
    def __init__(self,
                 action_spec,
                 observation_spec=None,
                 hidden_size=256,
                 reward_adapt_speed=8.0,
                 encoding_net: EncodingNetwork = None,
                 forward_net: EncodingNetwork = None,
                 inverse_net: EncodingNetwork = None,
                 activation=torch.relu_,
                 optimizer=None,
                 name="ICMAlgorithm"):
        """Create an ICMAlgorithm.

        Args
            action_spec (nested TensorSpec): agent's action spec
            observation_spec (nested TensorSpec): agent's observation spec. If
                not None, then a normalizer will be used to normalize the
                observation.
            hidden_size (int or tuple[int]): size of hidden layer(s)
            reward_adapt_speed (float): how fast to adapt the reward normalizer.
                rouphly speaking, the statistics for the normalization is
                calculated mostly based on the most recent T/speed samples,
                where T is the total number of samples.
            encoding_net (Network): network for encoding observation into a
                latent feature. Its input is same as the input of this algorithm.
            forward_net (Network): network for predicting next feature based on
                previous feature and action. It should accept input with spec
                [feature_spec, encoded_action_spec] and output a tensor of shape
                feature_spec. For discrete action, encoded_action is an one-hot
                representation of the action. For continuous action, encoded
                action is same as the original action.
            inverse_net (Network): network for predicting previous action given
                the previous feature and current feature. It should accept input
                with spec [feature_spec, feature_spec] and output tensor of
                shape (num_actions,).
            activation (torch.nn.functional): activation used for constructing
                any of the forward net and inverse net, if not provided.
            optimizer (torch.optim.Optimizer): The optimizer for training
            name (str):
        """
        if encoding_net is not None:
            feature_spec = encoding_net.output_spec
        else:
            feature_spec = observation_spec

        super(ICMAlgorithm, self).__init__(
            train_state_spec=feature_spec,
            predict_state_spec=(),
            optimizer=optimizer,
            name=name)

        flat_action_spec = alf.nest.flatten(action_spec)
        assert len(
            flat_action_spec) == 1, "ICM doesn't suport nested action_spec"

        flat_feature_spec = alf.nest.flatten(feature_spec)
        assert len(
            flat_feature_spec) == 1, "ICM doesn't support nested feature_spec"

        action_spec = flat_action_spec[0]

        if action_spec.is_discrete:
            self._num_actions = int(action_spec.maximum - action_spec.minimum +
                                    1)
        else:
            self._num_actions = action_spec.shape[-1]

        self._action_spec = action_spec
        self._observation_normalizer = None
        if observation_spec is not None:
            self._observation_normalizer = AdaptiveNormalizer(
                tensor_spec=observation_spec)

        feature_dim = flat_feature_spec[0].shape[-1]

        self._encoding_net = encoding_net

        if isinstance(hidden_size, int):
            hidden_size = (hidden_size, )

        if forward_net is None:
            encoded_action_spec = TensorSpec((self._num_actions, ),
                                             dtype=torch.float32)
            forward_net = EncodingNetwork(
                name="forward_net",
                input_tensor_spec=[feature_spec, encoded_action_spec],
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                activation=activation,
                last_layer_size=feature_dim,
                last_activation=math_ops.identity)

        self._forward_net = forward_net

        if inverse_net is None:
            inverse_net = EncodingNetwork(
                name="inverse_net",
                input_tensor_spec=[feature_spec, feature_spec],
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                activation=activation,
                last_layer_size=self._num_actions,
                last_activation=math_ops.identity,
                last_kernel_initializer=torch.nn.init.zeros_)

        self._inverse_net = inverse_net

        self._reward_normalizer = ScalarAdaptiveNormalizer(
            speed=reward_adapt_speed)
Exemplo n.º 14
0
    def test_conditional_vae(self):
        """Test for one dimensional Gaussion, conditioned on a Bernoulli variable.
        """
        prior_input_spec = BoundedTensorSpec((), 'int64')

        z_prior_network = EncodingNetwork(
            TensorSpec(
                (prior_input_spec.maximum - prior_input_spec.minimum + 1, )),
            fc_layer_params=(10, ) * 2,
            last_layer_size=2 * self._latent_dim,
            last_activation=math_ops.identity)
        preprocess_network = EncodingNetwork(
            input_tensor_spec=(
                z_prior_network.input_tensor_spec,
                self._input_spec,
                z_prior_network.output_spec,
            ),
            preprocessing_combiner=NestConcat(),
            fc_layer_params=(10, ) * 2,
            last_layer_size=self._latent_dim,
            last_activation=math_ops.identity)

        encoder = vae.VariationalAutoEncoder(
            self._latent_dim,
            preprocess_network=preprocess_network,
            z_prior_network=z_prior_network)
        decoding_layers = FC(self._latent_dim, 1)

        optimizer = torch.optim.Adam(
            list(encoder.parameters()) + list(decoding_layers.parameters()),
            lr=0.1)

        x_train = self._input_spec.randn(outer_dims=(10000, ))
        y_train = x_train.clone()
        y_train[:5000] = y_train[:5000] + 1.0
        pr_train = torch.cat([
            prior_input_spec.zeros(outer_dims=(5000, )),
            prior_input_spec.ones(outer_dims=(5000, ))
        ],
                             dim=0)

        x_test = self._input_spec.randn(outer_dims=(100, ))
        y_test = x_test.clone()
        y_test[:50] = y_test[:50] + 1.0
        pr_test = torch.cat([
            prior_input_spec.zeros(outer_dims=(50, )),
            prior_input_spec.ones(outer_dims=(50, ))
        ],
                            dim=0)
        pr_test = torch.nn.functional.one_hot(
            pr_test,
            int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32)

        for _ in range(self._epochs):
            idx = torch.randperm(x_train.shape[0])
            x_train = x_train[idx]
            y_train = y_train[idx]
            pr_train = pr_train[idx]
            for i in range(0, x_train.shape[0], self._batch_size):
                optimizer.zero_grad()
                batch = x_train[i:i + self._batch_size]
                y_batch = y_train[i:i + self._batch_size]
                pr_batch = torch.nn.functional.one_hot(
                    pr_train[i:i + self._batch_size],
                    int(z_prior_network.input_tensor_spec.shape[0])).to(
                        torch.float32)
                alg_step = encoder.train_step([pr_batch, batch])
                outputs = decoding_layers(alg_step.output)
                loss = torch.mean(100 * self._loss_f(y_batch - outputs) +
                                  alg_step.info.loss)
                loss.backward()
                optimizer.step()

        y_hat_test = decoding_layers(
            encoder.train_step([pr_test, x_test]).output)
        reconstruction_loss = float(
            torch.mean(self._loss_f(y_test - y_hat_test)))
        print("reconstruction_loss:", reconstruction_loss)
        self.assertLess(reconstruction_loss, 0.05)
Exemplo n.º 15
0
    def __init__(self,
                 input_tensor_spec,
                 memory_size,
                 core_size,
                 num_prememory_layers,
                 num_memory_layers,
                 num_attention_heads,
                 d_ff,
                 centralized_memory=True,
                 input_preprocessors=None,
                 name="TransformerNetwork"):
        """
        Args:
            input_tensor_spec (nested TensorSpec): the (nested) tensor spec of
                the input. If ``input_tensor_spec`` is not nested, it should
                represent a rank-2 tensor of shape ``[input_size, d_model]``, where
                ``input_size`` is the length of the input sequence, and ``d_model``
                is the dimension of embedding.
            memory_size (int): size of memory.
            core_size (int): size of core (i.e. number of embeddings of core)
            num_prememory_layers (int): number of TransformerBlock calculation
                without using memory
            num_memory_layers (int): number of TransformerBlock calculation
                using memory
            num_attention_heads (int): number of attention heads for each
                ``TransformerBlock``
            d_ff (int): the size of the hidden layer of the feedforward network
                in each ``TransformerBlock``
            centralized_memory (bool): if False, there will be a separate memory
                for each memory layers. if True, there will be a single memory
                for all the memroy layers and it is updated using the last core
                embeddings.
            input_preprocessors (nested Network|nn.Module): a nest of
                preprocessor networks, each of which will be applied to the
                corresponding input. If not None, then it must have the same
                structure with ``input_tensor_spec``. If any element is None, then
                it will be treated as math_ops.identity. This arg is helpful if
                you want to have separate preprocessings for different inputs by
                configuring a gin file without changing the code. For example,
                embedding a discrete input before concatenating it to another
                continuous vector. The output_spec of each input preprocessor i
                should be [input_size_i, d_model]. The result of all the preprocessors
                will be concatenated as a Tensor of shape ``[batch_size, input_size, d_model]``,
                where ``input_size = sum_i input_size_i``.
        """
        preprocessing_combiner = None
        if input_preprocessors is not None:
            preprocessing_combiner = NestConcat(dim=-2)
        super().__init__(input_tensor_spec,
                         input_preprocessors,
                         preprocessing_combiner=preprocessing_combiner,
                         name=name)

        assert self._processed_input_tensor_spec.ndim == 2

        input_size, d_model = self._processed_input_tensor_spec.shape
        if centralized_memory:
            self._memories = [FIFOMemory(d_model, memory_size)]
        else:
            self._memories = [
                FIFOMemory(d_model, memory_size)
                for _ in range(num_memory_layers)
            ]
        self._centralized_memory = centralized_memory

        self._core_size = core_size
        self._core_embedding = nn.Parameter(torch.Tensor(
            1, core_size, d_model))
        nn.init.uniform_(self._core_embedding, -0.1, 0.1)

        self._state_spec = [mem.state_spec for mem in self._memories]
        self._num_memory_layers = num_memory_layers
        self._num_prememory_layers = num_prememory_layers

        self._transformers = nn.ModuleList()

        for i in range(num_prememory_layers):
            self._transformers.append(
                alf.layers.TransformerBlock(d_model=d_model,
                                            num_heads=num_attention_heads,
                                            memory_size=input_size + core_size,
                                            positional_encoding='abs'))

        for i in range(num_memory_layers):
            self._transformers.append(
                alf.layers.TransformerBlock(d_model=d_model,
                                            num_heads=num_attention_heads,
                                            memory_size=memory_size +
                                            input_size + core_size,
                                            positional_encoding='abs'))
Exemplo n.º 16
0
    def test_input_preprocessor(self, lstm, preproc):
        def _check_with_shared_param(net1, net2, shared_subnet=None):
            net1_params = set(net1.parameters())
            net2_params = set(net2.parameters())
            # check that net1 and net2 share paramsters with shared_subnet
            if shared_subnet is not None:
                shared_params = set(shared_subnet.parameters())
                for p in shared_params:
                    self.assertTrue((p in net1_params) and (p in net2_params))

            # for the rest part, net1 and net2 do not share parameters
            for p1, p2 in zip(net1_params, net2_params):
                if shared_subnet is None or p1 not in shared_params:
                    self.assertTrue(p1 is not p2)

        # 1) test input_preprocessor copy and each copy has its own parameters
        input_preprocessor = preproc
        input_preprocessor_copy = input_preprocessor.copy()

        if not preproc._singleton_instance:
            _check_with_shared_param(input_preprocessor,
                                     input_preprocessor_copy)
        elif preproc._singleton_instance:
            _check_with_shared_param(input_preprocessor,
                                     input_preprocessor_copy,
                                     input_preprocessor)

        if lstm:
            network_ctor = functools.partial(LSTMEncodingNetwork,
                                             hidden_size=(1, ),
                                             post_fc_layer_params=(2, 2))
        else:
            network_ctor = functools.partial(EncodingNetwork,
                                             fc_layer_params=(10, 10))

        net = network_ctor(
            input_tensor_spec=[
                TestInputpreprocessor.input_spec,
                TestInputpreprocessor.input_spec
            ],
            input_preprocessors=[input_preprocessor, torch.relu],
            preprocessing_combiner=NestConcat(dim=1))

        # 2) test copied network has its own parameters, including
        # parameters from input preprocessors
        copied_net = net.copy()
        if not preproc._singleton_instance:
            _check_with_shared_param(net, copied_net)
        else:
            _check_with_shared_param(net, copied_net, input_preprocessor)

        # 3) test for each replica of the NaiveParallelNetwork has its own
        # parameters, including parameters from input preprocessors
        replicas = 2
        p_net = alf.networks.network.NaiveParallelNetwork(net, replicas)
        if not preproc._singleton_instance:
            _check_with_shared_param(p_net._networks[0], p_net._networks[1])
        else:
            _check_with_shared_param(p_net._networks[0], p_net._networks[1],
                                     input_preprocessor)

        # 4) test network forward
        batch_size = 6
        batch = TestInputpreprocessor.input_spec.zeros(
            outer_dims=(batch_size, ))

        if lstm:
            state = [(torch.zeros((batch_size, 1)), ) * 2]
            p_state = [(torch.zeros((batch_size, replicas, 1)), ) * 2]
        else:
            state = ()
            p_state = ()

        net([batch, batch], state)
        p_net([batch, batch], p_state)
Exemplo n.º 17
0
 def test_nest_selective_concat_tensors(self, mask, expected):
     ntuple = NTuple(
         a=dict(x=torch.zeros((2, 3)), y=torch.zeros((2, 4))),
         b=torch.zeros((2, 10)))
     ret = NestConcat(mask)(ntuple)
     self.assertTensorEqual(ret, expected)