def _build_for_dqn(self) -> Dict[str, Any]: return { 'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'env_wrappers': self.env_wrappers, 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6, output_activation=None, opt='adam', learning_rate=0.000105), 'gamma': 0.99, 'final_reward': None, # Use eps_initial > 1 here so only random actions used for first steps, which will make filling the # replay buffer more efficient. It'll also avoid decaying eps while not training. # Alternative: 'eps': EpsilonGreedy(eps_initial=1.2, decay=0.000025, eps_min=0.01, # decay_schedule='compound'), 'eps': EpsilonGreedy(eps_initial=1.1, decay=0.00001, eps_min=0.01, decay_schedule='linear'), 'replay_buffer': ContinuousBuffer(buffer_size=10000), 'replay_buffer_samples': 32 }
def _build_for_dqn(self) -> Dict[str, Any]: """This isn't tuned.""" return { 'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'model_architecture': DenseNN(observation_shape=(2, ), n_actions=3, opt='adam', learning_rate=0.001, unit_scale=12, dueling=False), 'gamma': 0.99, 'final_reward': 650, 'replay_buffer_samples': 32, 'eps': EpsilonGreedy(eps_initial=0.1, decay=0.002, eps_min=0.002, actions_pool=list(range(3))), 'replay_buffer': ContinuousBuffer(buffer_size=200) }
def _load_models_and_buffer(self) -> None: self._action_model = keras.models.load_model( f"{self._fn}/action_model") self._target_model = keras.models.load_model( f"{self._fn}/target_model") self.replay_buffer = ContinuousBuffer.load( f"{self._fn}/replay_buffer.joblib")
def _build_for_dqn(self) -> Dict[str, Any]: return { 'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'env_wrappers': self.env_wrappers, 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6, output_activation=None, opt='adam', learning_rate=0.00008), 'gamma': 0.99, 'final_reward': None, 'eps': EpsilonGreedy(eps_initial=2, decay=0.000025, eps_min=0.01, decay_schedule='linear'), 'replay_buffer': ContinuousBuffer(buffer_size=40000), 'replay_buffer_samples': 32 }
def _build_mock_config(base_config: PongConfig) -> MagicMock: config = base_config.build() config['eps'] = EpsilonGreedy(eps_initial=0.5, decay=0.0001, eps_min=0.01, decay_schedule='linear', actions_pool=list(range(3))) config['replay_buffer'] = ContinuousBuffer(buffer_size=10) config['replay_buffer_samples'] = 2 mock_config = MagicMock() mock_config.gpu_memory = 2048 mock_config.build.return_value = config return mock_config
def _build_for_dqn(self, dueling: bool = False) -> Dict[str, Any]: if self.using_simple_obs & self.using_smm_obs: model_config = self._build_with_splitter_conv_and_dense_model(dueling) elif self.using_simple_obs: model_config = self._build_with_dense_model(dueling) elif self.using_smm_obs: model_config = self._build_with_splitter_conv_model(dueling) else: raise NotImplementedError() config = {'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'gamma': 0.992, 'final_reward': 0, 'replay_buffer_samples': 32, 'eps': EpsilonGreedy(eps_initial=0.5, decay=0.00001, eps_min=0.01, actions_pool=list(range(19))), 'replay_buffer': ContinuousBuffer(buffer_size=10000)} config.update(model_config) return config
partial(FrameBufferWrapper, obs_shape=(115, ), buffer_length=2, buffer_function='stack') ], model_architecture=DenserNN(observation_shape=(115, 2), n_actions=19, dueling=False, output_activation=None, opt='adam', learning_rate=0.00009), eps=EpsilonPolicy(eps_initial=0.75, decay=0.000001, eps_min=0.01, policy=bot), replay_buffer=ContinuousBuffer(buffer_size=8000), training_history=TrainingHistory(plotting_on=True, plot_every=10, agent_name=name)) copy_pretrained_model_weights(from_model=pretrained_mod, to_model=agent._action_model) copy_pretrained_model_weights(from_model=pretrained_mod, to_model=agent._target_model) agent.train(n_episodes=N_EPISODES, render=False, checkpoint_every=100, max_episode_steps=3000) agent.save()