def test_saving_scheduler_on_all_model_free_algo(self): to_test_algo_func = (self.create_ppo, self.create_dqn, self.create_ddpg) for func in to_test_algo_func: self.setUp() single_exp_runner(_saving_scheduler(self, func), auto_choose_gpu_flag=False, gpu_id=0, del_if_log_path_existed=True) self.tearDown()
def test_exp_with_scheduler(self, algo=None, locals=None): def wrap_algo(algo=None, locals=None): def func(algo=algo, locals=locals): GlobalConfig().set('DEFAULT_EXPERIMENT_END_POINT', dict(TOTAL_AGENT_TRAIN_SAMPLE_COUNT=500, TOTAL_AGENT_TEST_SAMPLE_COUNT=None, TOTAL_AGENT_UPDATE_COUNT=None)) if not algo: algo, locals = self.create_dqn() env_spec = locals['env_spec'] env = locals['env'] agent = self.create_agent(env=locals['env'], algo=algo, name='agent', eps=self.create_eps(env_spec)[0], env_spec=env_spec)[0] exp = self.create_exp(name='model_free', env=env, agent=agent) algo.parameters.set_scheduler(param_key='LEARNING_RATE', to_tf_ph_flag=True, scheduler=LinearSchedule( t_fn=exp.TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT, schedule_timesteps=GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[ 'TOTAL_AGENT_TRAIN_SAMPLE_COUNT'], final_p=0.0001, initial_p=0.01)) exp.run() self.assertEqual(exp.TOTAL_AGENT_TEST_SAMPLE_COUNT(), exp.TOTAL_ENV_STEP_TEST_SAMPLE_COUNT()) self.assertEqual(exp.TOTAL_AGENT_TRAIN_SAMPLE_COUNT(), exp.TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT(), 500) return func single_exp_runner(wrap_algo(algo, locals), auto_choose_gpu_flag=False, gpu_id=0, del_if_log_path_existed=True)
def test_saving_scheduler_on_all_model_free_algo(self): to_test_algo_func = (self.create_ppo, self.create_dqn, self.create_ddpg) sample_traj_flag = (True, False, False) for i, func in enumerate(to_test_algo_func): self.setUp() single_exp_runner(_saving_scheduler( self, func, sample_traj_flag=sample_traj_flag[i]), auto_choose_gpu_flag=False, gpu_id=0, del_if_log_path_existed=True) self.tearDown()
def test_experiment(self): def func(): GlobalConfig().set('DEFAULT_EXPERIMENT_END_POINT', dict(TOTAL_AGENT_TRAIN_SAMPLE_COUNT=200, TOTAL_AGENT_TEST_SAMPLE_COUNT=None, TOTAL_AGENT_UPDATE_COUNT=None)) dqn, locals = self.create_dqn() env_spec = locals['env_spec'] env = locals['env'] agent = self.create_agent(env=locals['env'], algo=dqn, name='agent', eps=self.create_eps(env_spec)[0], env_spec=env_spec)[0] exp = self.create_exp(name='model_free', env=env, agent=agent) exp.run() single_exp_runner(func, auto_choose_gpu_flag=False, gpu_id=0, del_if_log_path_existed=True)
start_test_after_sample_count=5, start_train_after_sample_count=5, train_func_and_args=(agent.train, (), dict()), test_func_and_args=(agent.test, (), dict(sample_count=10)), sample_func_and_args=(agent.sample, (), dict(sample_count=100, env=agent.env, store_flag=True)) ) experiment = Experiment( tuner=None, env=env, agent=agent, flow=flow, name=name + 'experiment_debug' ) dqn.parameters.set_scheduler(param_key='LEARNING_RATE', scheduler=LinearScheduler( t_fn=experiment.TOTAL_AGENT_TRAIN_SAMPLE_COUNT, schedule_timesteps=GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[ 'TOTAL_AGENT_TRAIN_SAMPLE_COUNT'], final_p=0.0001, initial_p=0.01)) experiment.run() from baconian.core.experiment_runner import single_exp_runner GlobalConfig().set('DEFAULT_LOG_PATH', './log_path') single_exp_runner(task_fn, del_if_log_path_existed=True)
noise_adder=noise_adder, name=name + '_agent') flow = create_train_test_flow(env=env, cyber=cyber, agent=agent, num_test=num_test, total_steps=total_steps, max_step_per_episode=max_step_per_episode, train_after_step=train_after_step, test_after_step=test_after_step, train_every_step=train_every_step, test_every_step=test_every_step, train_func_and_args=(agent.train, (), dict()), test_func_and_args=(agent.test, (), dict()), sample_func_and_args=(agent.sample, (), dict()), flow_type='DDPG_TrainTestFlow') experiment = Experiment(tuner=None, env=env, agent=agent, flow=flow, name=name) experiment.run() if __name__ == '__main__': single_exp_runner(task_fn, del_if_log_path_existed=True, keep_session=True)