Пример #1
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 true_obs_shape=(4, ),
                 action_embed_size=2,
                 **kw):
        DQNTorchModel.__init__(self, obs_space, action_space, num_outputs,
                               model_config, name, **kw)

        self.action_embed_model = TorchFC(Box(-1, 1, shape=true_obs_shape),
                                          action_space, action_embed_size,
                                          model_config, name + "_action_embed")
Пример #2
0
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
        action_embed_size=2,  # Dimensionality of sentence embeddings  TODO don't make this hard-coded
        **kw):
        DQNTorchModel.__init__(self, obs_space, action_space, num_outputs,
                               model_config, name, **kw)

        self.true_obs_preprocessor = DictFlatteningPreprocessor(
            obs_space.original_space["true_obs"])
        self.action_embed_model = TorchFC(
            Box(-10, 10, self.true_obs_preprocessor.shape), action_space,
            action_embed_size, model_config, name + "_action_embed")
Пример #3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        DQNTorchModel.__init__(
            self,
            Space(shape=(obs_space.shape[0] * obs_space.shape[1] * 4, ),
                  dtype=np.float), action_space, num_outputs, model_config,
            name)
        nn.Module.__init__(self)

        self.convfilter = nn.Sequential(
            nn.Conv2d(21, 8, kernel_size=3, padding=2),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 4, kernel_size=3, padding=2),
            nn.BatchNorm2d(4),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(obs_space.shape[0] * obs_space.shape[1] * 4, 32),
            nn.ReLU(),
        )
Пример #4
0
    def __init__(self, obs_space: Any, action_space: spaces.Space, num_outputs: int, model_config: Dict, name: str,
                 maze_model_composer_config: ConfigType, spaces_config_dump_file: str, state_dict_dump_file: str,
                 dueling: bool = True, num_atoms: int = 1, **kwargs):
        unused(num_outputs)

        org_obs_space = obs_space.original_space

        assert isinstance(action_space, spaces.Discrete), f'Only discrete spaces supported but got {action_space}'

        num_outputs = action_space.n
        org_action_space = spaces.Dict({'action': action_space})

        assert dueling is True, 'Only dueling==True is supported at this point'
        assert num_atoms == 1, 'Only num_atoms == 1 is suported at this point'

        DQNTorchModel.__init__(self, obs_space=obs_space,
                               action_space=action_space,
                               model_config=model_config,
                               num_outputs=num_outputs,
                               name=name + '_maze_wrapper', dueling=dueling, num_atoms=num_atoms, **kwargs)
        import random
        import numpy as np
        random.seed(42)
        np.random.seed(42)
        torch.manual_seed(42)

        MazeRLlibBaseModel.__init__(self, observation_space=org_obs_space, action_space=org_action_space,
                                    model_config=model_config, maze_model_composer_config=maze_model_composer_config,
                                    spaces_config_dump_file=spaces_config_dump_file,
                                    state_dict_dump_file=state_dict_dump_file)

        self._advantage_module: nn.Module = list(self.model_composer.policy.networks.values())[0]
        self.advantage_module = nn.Identity()

        # Assert that at most one network is used for critic
        assert self.model_composer.critic is not None and len(self.model_composer.critic.networks) == 1
        self._value_module: nn.Module = list(self.model_composer.critic.networks.values())[0]

        # Init class values
        self._value_module_input = None
        self._model_maze_input: Optional[Dict[str, torch.Tensor]] = None
Пример #5
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name, **kwargs):
     nn.Module.__init__(self)
     DQNTorchModel.__init__(self, obs_space, action_space, num_outputs,
                            model_config, name, **kwargs)
     self.shape = model_config["custom_options"]["shape"]
     self.num_stack = model_config["custom_options"]["num_stack"]
     in_channels = self.shape[2]
     # Module Parameters
     self.module = nn.Sequential()
     # First Convolutional layer
     self.module.add_module(
         "conv_1",
         nn.Conv3d(in_channels, 16, kernel_size=3, stride=2, padding=2))
     self.module.add_module("relu_1", nn.ReLU())
     self.module.add_module("conv_2",
                            nn.Conv3d(16, 16, kernel_size=3, stride=2))
     self.module.add_module("relu_2", nn.ReLU())
     self.module.add_module("flatten", Flatten())
     self.module.add_module(
         "output", nn.Linear(in_features=2400, out_features=num_outputs))
Пример #6
0
 def forward(self, input_dict, state, seq_lens):
     input_dict_copy = input_dict.copy()
     input_dict_copy['obs'] = self.net(input_dict_copy["obs"])
     return DQNTorchModel.forward(self, input_dict_copy, state, seq_lens)
Пример #7
0
 def with_updates(self, *args, **kwargs):
     DQNTorchModel.with_updates(self, *args, **kwargs)