def train_eval(log_dir="DDPG", prev_log="", google_colab=False, seed=123, gpu_id=0, env_name="HalfCheetah-v2", num_frames=10000, tau=1e-2, memory_size=5000, hot_start=100, batch_size=200, interval_MAR=10, gamma=0.99, L2_reg=0.5, random_process="ou", mu=0.3, sigma=0.2, num_eval_episodes=1, eval_interval=1000): tf.compat.v1.set_random_seed(seed) np.random.seed(seed=seed) # prep for training log_dir = set_up_for_training(env_name=env_name, seed=seed, gpu_id=gpu_id, log_dir=log_dir, prev_log=prev_log, google_colab=google_colab) env = gym.make(env_name) env = Monitor(env=env, directory=log_dir["video_path"], force=True) replay_buffer = ReplayBuffer(memory_size, traj_dir=log_dir["traj_path"]) reward_buffer = deque(maxlen=interval_MAR) summary_writer = tf.compat.v2.summary.create_file_writer( log_dir["summary_path"]) if random_process == "ou": random_process = OrnsteinUhlenbeckProcess( size=env.action_space.shape[0], theta=0.15, mu=mu, sigma=sigma) elif random_process == "gaussian": random_process = GaussianNoise(mu=mu, sigma=sigma) else: random_process = False assert False, "choose the random process from either gaussian or ou" agent = DDPG(actor=Actor, critic=Critic, num_action=env.action_space.shape[0], random_process=random_process, gamma=gamma, L2_reg=L2_reg, actor_model_dir=log_dir["model_path"] + "/actor", critic_model_dir=log_dir["model_path"] + "/critic") train(agent, env, replay_buffer, reward_buffer, summary_writer, num_eval_episodes, num_frames, tau, eval_interval, hot_start, batch_size, interval_MAR, log_dir, google_colab)
def prep_env(env_name, video_path): if env_name.lower() == "cartpole": env = gym.make("CartPole-v0") env.record_start = lambda: None env.record_end = lambda: None else: env = wrap_deepmind( make_atari(env_name + "NoFrameskip-v4")) # make sure to add NoFrameskip-v4 env = Monitor(env=env, directory=video_path, force=True) return env
mu = str(params.mu).split(".") mu = str(mu[0] + mu[1]) params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) env = gym.make(params.env_name) env = Monitor(env, params.video_dir) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0], theta=0.15, mu=params.mu, sigma=params.sigma) # random_process = GaussianNoise(mu=params.mu, sigma=params.sigma) agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
# run this from the terminal and make sure you are loading appropriate environment variables # $ echo $LD_LIBRARY_PATH import gym from tf_rl.common.monitor import Monitor import environments.register as register video_dir = "./video/" temp = 5 env = gym.make("CentipedeSix-v1") env = Monitor(env, video_dir, force=True) for ep in range(10): if ep % temp == 0: print("recording") env.record_start() env.reset() done = False while not done: # env.render() action = env.action_space.sample() s, r, done, info = env.step(action) # take a random action if ep % temp == 0: env.record_end()
default=0.05, type=float, help="magnitude of randomness") parser.add_argument("--n_trial", default=10, type=int, help="num of eval ep") parser.add_argument("--action_range", default=[-1., 1.], type=list, help="magnitude of L2 regularisation") params = parser.parse_args() params.actor_model_dir = "../../logs/models/20190731-181814-DDPG-GGNN_actor/" params.critic_model_dir = "../../logs/models/20190731-181814-DDPG-GGNN_critic/" params.video_dir = "./video_{}".format(str(params.env_name)) env = gym.make(params.env_name) env = Monitor(env, params.video_dir, force=True) random_process = GaussianNoise(mu=0.0, sigma=0.0) agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) global_timestep = tf.compat.v1.train.get_or_create_global_step() all_distances, all_rewards, all_actions = list(), list(), list() distance_func = get_distance( agent.params.env_name) # create the distance measure func print("=== Evaluation Mode ===") for ep in range(params.n_trial): env.record_start() obs = env.reset() state = obs["flat_obs"] done = False
import gym from tf_rl.common.monitor import Monitor ENVS = [ "Ant-v2", "HalfCheetah-v2", "Hopper-v2", "Humanoid-v2", # "Reacher-v2", # "Swimmer-v2", "Walker2d-v2" ] DEFAULT = 250 for env_name in ENVS: env = gym.make(env_name) env = Monitor(env, "./video/{}".format(env_name), force=True) print(env_name) env.record_start() env.reset() done = False while not done: # env.render(mode="human", annotation_flg=False) s, r, done, i = env.step(env.action_space.sample()) env.record_end() env.close()
now = datetime.datetime.now() params.log_dir = "../logs/logs/DDPG-GGNN-seed{}/{}".format( params.seed, str(params.env_name.split("-")[0])) params.actor_model_dir = "../logs/models/DDPG-GGNN-seed{}/{}/actor/".format( params.seed, str(params.env_name.split("-")[0])) params.critic_model_dir = "../logs/models/DDPG-GGNN-seed{}/{}/critic/".format( params.seed, str(params.env_name.split("-")[0])) params.video_dir = "../logs/video/DDPG-GGNN-seed{}/{}/".format( params.seed, str(params.env_name.split("-")[0])) params.plot_path = "../logs/plots/DDPG-GGNN-seed{}/{}/".format( params.seed, str(params.env_name.split("-")[0])) # Instantiate Env env = gym.make(params.env_name) env = Monitor(env, params.video_dir, force=True) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) # Invoke components replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) # Reading out the information regarding the robot from the XML node_info = parse_mujoco_graph(task_name=params.env_name) node_info = gnn_util.add_node_info(node_info, input_feat_dim=params.input_feat_dim)
import gym from tf_rl.common.monitor import Monitor env = gym.make('CartPole-v0') env = Monitor(env=env, directory="./video/cartpole", force=True) for ep in range(20): if ep == 0: env.record_start() state = env.reset() for t in range(1000): action = env.action_space.sample() state, reward, done, info = env.step(action) if done: if ep == 0: env.record_end() break env.close()
import itertools from tf_rl.common.monitor import Monitor from tf_rl.common.wrappers import wrap_deepmind, make_atari env_name = "PongNoFrameskip-v4" env = wrap_deepmind(make_atari(env_name)) env = Monitor(env=env, directory="./video/{}".format(env_name), force=True) env.record_start() state = env.reset() for t in itertools.count(): action = env.action_space.sample() next_state, reward, done, info = env.step(action) state = next_state if done: break print("End at {}".format(t + 1)) env.record_end() env.close()
params.test_episodes = 1 env_name = str(params.env_name.split("-")[0]) params.log_dir = "../../logs/logs/DDPG-debug-seed{}/{}".format( params.seed, env_name) params.actor_model_dir = "../../logs/models/DDPG-debug-seed{}/{}/actor/".format( params.seed, env_name) params.critic_model_dir = "../../logs/models/DDPG-debug-seed{}/{}/critic/".format( params.seed, env_name) params.video_dir = "../../logs/video/DDPG-debug-seed{}/{}/".format( params.seed, env_name) params.plot_path = "../../logs/plots/DDPG-debug-seed{}/{}/".format( params.seed, env_name) env = gym.make(params.env_name) env = Monitor(env, params.video_dir, force=True) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0], theta=0.15, mu=params.mu, sigma=params.sigma) agent = DDPG_debug(Actor, Critic, env.action_space.shape[0], random_process,