def _build_for_dqn(self) -> Dict[str, Any]:
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'env_wrappers':
         self.env_wrappers,
         'model_architecture':
         ConvNN(observation_shape=(84, 84, self.frame_depth),
                n_actions=6,
                output_activation=None,
                opt='adam',
                learning_rate=0.000105),
         'gamma':
         0.99,
         'final_reward':
         None,
         # Use eps_initial > 1 here so only random actions used for first steps, which will make filling the
         # replay buffer more efficient. It'll also avoid decaying eps while not training.
         # Alternative: 'eps': EpsilonGreedy(eps_initial=1.2, decay=0.000025, eps_min=0.01,
         #                                   decay_schedule='compound'),
         'eps':
         EpsilonGreedy(eps_initial=1.1,
                       decay=0.00001,
                       eps_min=0.01,
                       decay_schedule='linear'),
         'replay_buffer':
         ContinuousBuffer(buffer_size=10000),
         'replay_buffer_samples':
         32
     }
Exemplo n.º 2
0
 def _build_for_dqn(self) -> Dict[str, Any]:
     """This isn't tuned."""
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'model_architecture':
         DenseNN(observation_shape=(2, ),
                 n_actions=3,
                 opt='adam',
                 learning_rate=0.001,
                 unit_scale=12,
                 dueling=False),
         'gamma':
         0.99,
         'final_reward':
         650,
         'replay_buffer_samples':
         32,
         'eps':
         EpsilonGreedy(eps_initial=0.1,
                       decay=0.002,
                       eps_min=0.002,
                       actions_pool=list(range(3))),
         'replay_buffer':
         ContinuousBuffer(buffer_size=200)
     }
Exemplo n.º 3
0
 def _load_models_and_buffer(self) -> None:
     self._action_model = keras.models.load_model(
         f"{self._fn}/action_model")
     self._target_model = keras.models.load_model(
         f"{self._fn}/target_model")
     self.replay_buffer = ContinuousBuffer.load(
         f"{self._fn}/replay_buffer.joblib")
 def _build_for_dqn(self) -> Dict[str, Any]:
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'env_wrappers':
         self.env_wrappers,
         'model_architecture':
         ConvNN(observation_shape=(84, 84, self.frame_depth),
                n_actions=6,
                output_activation=None,
                opt='adam',
                learning_rate=0.00008),
         'gamma':
         0.99,
         'final_reward':
         None,
         'eps':
         EpsilonGreedy(eps_initial=2,
                       decay=0.000025,
                       eps_min=0.01,
                       decay_schedule='linear'),
         'replay_buffer':
         ContinuousBuffer(buffer_size=40000),
         'replay_buffer_samples':
         32
     }
    def _build_mock_config(base_config: PongConfig) -> MagicMock:
        config = base_config.build()
        config['eps'] = EpsilonGreedy(eps_initial=0.5, decay=0.0001, eps_min=0.01, decay_schedule='linear',
                                      actions_pool=list(range(3)))
        config['replay_buffer'] = ContinuousBuffer(buffer_size=10)
        config['replay_buffer_samples'] = 2
        mock_config = MagicMock()
        mock_config.gpu_memory = 2048
        mock_config.build.return_value = config

        return mock_config
    def _build_for_dqn(self, dueling: bool = False) -> Dict[str, Any]:
        if self.using_simple_obs & self.using_smm_obs:
            model_config = self._build_with_splitter_conv_and_dense_model(dueling)
        elif self.using_simple_obs:
            model_config = self._build_with_dense_model(dueling)
        elif self.using_smm_obs:
            model_config = self._build_with_splitter_conv_model(dueling)
        else:
            raise NotImplementedError()

        config = {'name': os.path.join(self.folder, 'DeepQAgent'),
                  'env_spec': self.env_spec,
                  'gamma': 0.992,
                  'final_reward': 0,
                  'replay_buffer_samples': 32,
                  'eps': EpsilonGreedy(eps_initial=0.5, decay=0.00001, eps_min=0.01, actions_pool=list(range(19))),
                  'replay_buffer': ContinuousBuffer(buffer_size=10000)}

        config.update(model_config)

        return config
            partial(FrameBufferWrapper,
                    obs_shape=(115, ),
                    buffer_length=2,
                    buffer_function='stack')
        ],
        model_architecture=DenserNN(observation_shape=(115, 2),
                                    n_actions=19,
                                    dueling=False,
                                    output_activation=None,
                                    opt='adam',
                                    learning_rate=0.00009),
        eps=EpsilonPolicy(eps_initial=0.75,
                          decay=0.000001,
                          eps_min=0.01,
                          policy=bot),
        replay_buffer=ContinuousBuffer(buffer_size=8000),
        training_history=TrainingHistory(plotting_on=True,
                                         plot_every=10,
                                         agent_name=name))

    copy_pretrained_model_weights(from_model=pretrained_mod,
                                  to_model=agent._action_model)
    copy_pretrained_model_weights(from_model=pretrained_mod,
                                  to_model=agent._target_model)

    agent.train(n_episodes=N_EPISODES,
                render=False,
                checkpoint_every=100,
                max_episode_steps=3000)
    agent.save()