c = Config() # c.restart_from_trial = "2020_05_09_15_00_31" c.max_episodes = 50000 c.max_steps = 1000 c.replay_size = 50000 c.agent_num = 1 c.device = "cuda:0" c.root_dir = "/data/AI/tmp/multi_agent/mcarrier/naive_ppo_parallel/" # train configs # lr: learning rate, int: interval c.workers = 5 c.discount = 0.99 c.learning_rate = 3e-4 c.entropy_weight = None c.ppo_update_batch_size = 100 c.ppo_update_times = 50 c.ppo_update_int = 5 # = the number of episodes stored in ppo replay buffer c.model_save_int = c.ppo_update_int * 20 # in episodes c.profile_int = 50 # in episodes if __name__ == "__main__": save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial) prep_args(c, save_env) # save_env.remove_trials_older_than(diff_hour=1) global_board.init(save_env.get_trial_train_log_dir()) writer = global_board.writer logger.info("Directories prepared.")
# configs c = Config() # c.restart_from_trial = "2020_05_09_15_00_31" c.max_episodes = 50000 c.max_steps = 300 c.replay_size = 10000 c.device = "cuda:0" c.root_dir = "/data/AI/tmp/multi_agent/lunar_lander/naive_ppo/" # train configs # lr: learning rate, int: interval c.discount = 0.99 c.learning_rate = 1e-3 c.entropy_weight = 1e-2 c.ppo_update_batch_size = 100 c.ppo_update_times = 4 c.ppo_update_int = 6 # = the number of episodes stored in ppo replay buffer c.model_save_int = 100 # in episodes c.profile_int = 50 # in episodes if __name__ == "__main__": save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial) prep_args(c, save_env) # save_env.remove_trials_older_than(diff_hour=1) global_board.init(save_env.get_trial_train_log_dir()) writer = global_board.writer logger.info("Directories prepared.")