Пример #1
0
    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        use_state: bool,
        use_action: bool,
        use_next_state: bool,
        use_done: bool,
        **kwargs,
    ):
        """Builds reward MLP.

        Args:
          observation_space: The observation space.
          action_space: The action space.
          use_state: should the current state be included as an input to the MLP?
          use_action: should the current action be included as an input to the MLP?
          use_next_state: should the next state be included as an input to the MLP?
          use_done: should the "done" flag be included as an input to the MLP?
          kwargs: passed straight through to build_mlp.
        """
        super().__init__()
        combined_size = 0

        self.use_state = use_state
        if self.use_state:
            combined_size += preprocessing.get_flattened_obs_dim(
                observation_space)

        self.use_action = use_action
        if self.use_action:
            combined_size += preprocessing.get_flattened_obs_dim(action_space)

        self.use_next_state = use_next_state
        if self.use_next_state:
            combined_size += preprocessing.get_flattened_obs_dim(
                observation_space)

        self.use_done = use_done
        if self.use_done:
            combined_size += 1

        full_build_mlp_kwargs = {
            "hid_sizes": (32, 32),
        }
        full_build_mlp_kwargs.update(kwargs)
        full_build_mlp_kwargs.update({
            # we do not want these overridden
            "in_size": combined_size,
            "out_size": 1,
            "squeeze_output": True,
        })

        self.mlp = networks.build_mlp(**full_build_mlp_kwargs)
Пример #2
0
    def __init__(
        self, action_space: gym.Space, observation_space: gym.Space, **mlp_kwargs
    ):
        super().__init__()

        in_size = preprocessing.get_flattened_obs_dim(
            observation_space
        ) + preprocessing.get_flattened_obs_dim(action_space)
        self.mlp = build_mlp(
            **{"in_size": in_size, "out_size": 1, **mlp_kwargs}
        )
Пример #3
0
 def __init__(self, observation_space: gym.Space):
     super(FlattenBatchNormExtractor,
           self).__init__(observation_space,
                          get_flattened_obs_dim(observation_space))
     self.flatten = nn.Flatten()
     self.batch_norm = nn.BatchNorm1d(self._features_dim)
     self.dropout = nn.Dropout(0.5)
Пример #4
0
 def _initialize(self):
     self.dense1 = tf.keras.layers.Dense(
         400,
         input_shape=(get_flattened_obs_dim(TaskEnv.observation_space) +
                      get_action_dim(TaskEnv.action_space), ),
         activation='relu')
     self.dense2 = tf.keras.layers.Dense(300, activation='relu')
     self.dense3 = tf.keras.layers.Dense(1)
Пример #5
0
    def __init__(self):
        super().__init__()

        self._initialize()

        self.call(
            tf.constant(
                np.zeros(
                    shape=(1, get_flattened_obs_dim(TaskEnv.observation_space)
                           ))))  # Initialize parameters by calling

        self.loss_function_id = ACTOR_LOSS
Пример #6
0
    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        *,
        base_reward_net: Optional[nn.Module] = None,
        potential_net: Optional[nn.Module] = None,
        **kwargs,
    ):
        """Builds a simple shaped reward network.

        Args:
          observation_space: The observation space.
          action_space: The action space.
          base_reward_net: Network responsible for computing "base" reward.
          potential_net: Net work responsible for computing a potential
            function that will be used to provide additional potential-based
            shaping, in addition to the reward produced by `base_reward_net`.
          kwargs: Passed through to `RewardNetShaped`.
        """
        super().__init__(
            observation_space,
            action_space,
            **kwargs,
        )

        if base_reward_net is None:
            self._base_reward_net = BasicRewardMLP(
                observation_space=self.observation_space,
                action_space=self.action_space,
                use_state=self.use_state,
                use_action=self.use_action,
                use_next_state=self.use_next_state,
                use_done=self.use_done,
                hid_sizes=(32, 32),
            )
        else:
            self._base_reward_net = base_reward_net

        if potential_net is None:
            potential_in_size = preprocessing.get_flattened_obs_dim(
                self.observation_space)
            self._potential_net = networks.build_mlp(
                in_size=potential_in_size,
                hid_sizes=(32, 32),
                squeeze_output=True,
                flatten_input=True,
            )
        else:
            self._potential_net = potential_net
Пример #7
0
 def __init__(self, observation_space: gym.Space):
     super().__init__(observation_space, get_flattened_obs_dim(observation_space))
     self.total_available_modules = 8
     _pns = []
     for i in range(self.total_available_modules):
         alignment_matrix = nn.Linear(26,26)
         if common.args.pns_init:
             # first 8 numbers should be global observation (a reasonable prior), so initialize this, might make learning faster.
             with th.no_grad():
                 alignment_matrix.weight[:, :8] = 0.
                 alignment_matrix.weight[:8, :] = 0.
                 for i in range(8):
                     alignment_matrix.weight[i,i] = 1.
         _pns.append(alignment_matrix)
     self.pns = nn.ModuleList(_pns)
     self.robot_id_2_idx = {}
Пример #8
0
def _setup_airl_undiscounted_shaped_reward_net(venv):
    potential_in_size = preprocessing.get_flattened_obs_dim(
        venv.observation_space)
    potential_net = networks.build_mlp(
        in_size=potential_in_size,
        hid_sizes=(32, 32),
        squeeze_output=True,
    )
    reward_net = reward_nets.BasicShapedRewardNet(
        venv.observation_space,
        venv.action_space,
        discount_factor=1.0,
        use_next_state=True,
        use_done=True,
        potential_net=potential_net,
    )
    return discrim_nets.DiscrimNetAIRL(reward_net)
Пример #9
0
    def __init__(self,
                 observation_space: gym.spaces.Dict,
                 cnn_output_dim: int = 256):
        # TODO we do not know features-dim here before going over all the items, so put something there. This is dirty!
        super(CombinedExtractor, self).__init__(observation_space,
                                                features_dim=1)

        extractors = {}

        total_concat_size = 0
        for key, subspace in observation_space.spaces.items():
            if is_image_space(subspace):
                extractors[key] = NatureCNN(subspace,
                                            features_dim=cnn_output_dim)
                total_concat_size += cnn_output_dim
            else:
                # The observation key is a vector, flatten it if needed
                extractors[key] = nn.Flatten()
                total_concat_size += get_flattened_obs_dim(subspace)

        self.extractors = nn.ModuleDict(extractors)

        # Update the features dim manually
        self._features_dim = total_concat_size
Пример #10
0
 def __init__(self, observation_space: gym.Space):
     super(FlattenExtractor,
           self).__init__(observation_space,
                          get_flattened_obs_dim(observation_space))
     self.flatten = nn.Flatten()
Пример #11
0
 def __init__(self, pg_agent_config : PolicyGradientAgentConfig, observation_space: gym.Space,
              node_net : bool = False, at_net : bool = False):
     super(FlattenExtractor, self).__init__(pg_agent_config, observation_space,
                                            get_flattened_obs_dim(observation_space), at_net=at_net,
                                            node_net=node_net)
     self.flatten = nn.Flatten()