示例#1
0
def train_eval(log_dir="DDPG",
               prev_log="",
               google_colab=False,
               seed=123,
               gpu_id=0,
               env_name="HalfCheetah-v2",
               num_frames=10000,
               tau=1e-2,
               memory_size=5000,
               hot_start=100,
               batch_size=200,
               interval_MAR=10,
               gamma=0.99,
               L2_reg=0.5,
               random_process="ou",
               mu=0.3,
               sigma=0.2,
               num_eval_episodes=1,
               eval_interval=1000):
    tf.compat.v1.set_random_seed(seed)
    np.random.seed(seed=seed)

    # prep for training
    log_dir = set_up_for_training(env_name=env_name,
                                  seed=seed,
                                  gpu_id=gpu_id,
                                  log_dir=log_dir,
                                  prev_log=prev_log,
                                  google_colab=google_colab)

    env = gym.make(env_name)
    env = Monitor(env=env, directory=log_dir["video_path"], force=True)

    replay_buffer = ReplayBuffer(memory_size, traj_dir=log_dir["traj_path"])
    reward_buffer = deque(maxlen=interval_MAR)
    summary_writer = tf.compat.v2.summary.create_file_writer(
        log_dir["summary_path"])

    if random_process == "ou":
        random_process = OrnsteinUhlenbeckProcess(
            size=env.action_space.shape[0], theta=0.15, mu=mu, sigma=sigma)
    elif random_process == "gaussian":
        random_process = GaussianNoise(mu=mu, sigma=sigma)
    else:
        random_process = False
        assert False, "choose the random process from either gaussian or ou"

    agent = DDPG(actor=Actor,
                 critic=Critic,
                 num_action=env.action_space.shape[0],
                 random_process=random_process,
                 gamma=gamma,
                 L2_reg=L2_reg,
                 actor_model_dir=log_dir["model_path"] + "/actor",
                 critic_model_dir=log_dir["model_path"] + "/critic")

    train(agent, env, replay_buffer, reward_buffer, summary_writer,
          num_eval_episodes, num_frames, tau, eval_interval, hot_start,
          batch_size, interval_MAR, log_dir, google_colab)
示例#2
0
def prep_env(env_name, video_path):
    if env_name.lower() == "cartpole":
        env = gym.make("CartPole-v0")
        env.record_start = lambda: None
        env.record_end = lambda: None
    else:
        env = wrap_deepmind(
            make_atari(env_name +
                       "NoFrameskip-v4"))  # make sure to add NoFrameskip-v4
        env = Monitor(env=env, directory=video_path, force=True)
    return env
示例#3
0
mu = str(params.mu).split(".")
mu = str(mu[0] + mu[1])
params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0],
                                          theta=0.15,
                                          mu=params.mu,
                                          sigma=params.sigma)
# random_process = GaussianNoise(mu=params.mu, sigma=params.sigma)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)
train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
示例#4
0
# run this from the terminal and make sure you are loading appropriate environment variables
# $ echo $LD_LIBRARY_PATH

import gym
from tf_rl.common.monitor import Monitor
import environments.register as register

video_dir = "./video/"
temp = 5

env = gym.make("CentipedeSix-v1")
env = Monitor(env, video_dir, force=True)

for ep in range(10):
    if ep % temp == 0:
        print("recording")
        env.record_start()

    env.reset()
    done = False
    while not done:
        # env.render()
        action = env.action_space.sample()
        s, r, done, info = env.step(action)  # take a random action
    if ep % temp == 0:
        env.record_end()
示例#5
0
                    default=0.05,
                    type=float,
                    help="magnitude of randomness")
parser.add_argument("--n_trial", default=10, type=int, help="num of eval ep")
parser.add_argument("--action_range",
                    default=[-1., 1.],
                    type=list,
                    help="magnitude of L2 regularisation")
params = parser.parse_args()

params.actor_model_dir = "../../logs/models/20190731-181814-DDPG-GGNN_actor/"
params.critic_model_dir = "../../logs/models/20190731-181814-DDPG-GGNN_critic/"
params.video_dir = "./video_{}".format(str(params.env_name))

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir, force=True)

random_process = GaussianNoise(mu=0.0, sigma=0.0)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)

global_timestep = tf.compat.v1.train.get_or_create_global_step()

all_distances, all_rewards, all_actions = list(), list(), list()
distance_func = get_distance(
    agent.params.env_name)  # create the distance measure func
print("=== Evaluation Mode ===")
for ep in range(params.n_trial):
    env.record_start()
    obs = env.reset()
    state = obs["flat_obs"]
    done = False
import gym
from tf_rl.common.monitor import Monitor

ENVS = [
    "Ant-v2",
    "HalfCheetah-v2",
    "Hopper-v2",
    "Humanoid-v2",
    # "Reacher-v2",
    # "Swimmer-v2",
    "Walker2d-v2"
]

DEFAULT = 250

for env_name in ENVS:
    env = gym.make(env_name)
    env = Monitor(env, "./video/{}".format(env_name), force=True)
    print(env_name)
    env.record_start()
    env.reset()
    done = False
    while not done:
        # env.render(mode="human", annotation_flg=False)
        s, r, done, i = env.step(env.action_space.sample())
    env.record_end()
    env.close()
now = datetime.datetime.now()

params.log_dir = "../logs/logs/DDPG-GGNN-seed{}/{}".format(
    params.seed, str(params.env_name.split("-")[0]))
params.actor_model_dir = "../logs/models/DDPG-GGNN-seed{}/{}/actor/".format(
    params.seed, str(params.env_name.split("-")[0]))
params.critic_model_dir = "../logs/models/DDPG-GGNN-seed{}/{}/critic/".format(
    params.seed, str(params.env_name.split("-")[0]))
params.video_dir = "../logs/video/DDPG-GGNN-seed{}/{}/".format(
    params.seed, str(params.env_name.split("-")[0]))
params.plot_path = "../logs/plots/DDPG-GGNN-seed{}/{}/".format(
    params.seed, str(params.env_name.split("-")[0]))

# Instantiate Env
env = gym.make(params.env_name)
env = Monitor(env, params.video_dir, force=True)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

# Invoke components
replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)

# Reading out the information regarding the robot from the XML
node_info = parse_mujoco_graph(task_name=params.env_name)
node_info = gnn_util.add_node_info(node_info,
                                   input_feat_dim=params.input_feat_dim)
示例#8
0
import gym
from tf_rl.common.monitor import Monitor

env = gym.make('CartPole-v0')
env = Monitor(env=env, directory="./video/cartpole", force=True)

for ep in range(20):
    if ep == 0: env.record_start()
    state = env.reset()
    for t in range(1000):
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        if done:
            if ep == 0: env.record_end()
            break

env.close()
示例#9
0
import itertools
from tf_rl.common.monitor import Monitor
from tf_rl.common.wrappers import wrap_deepmind, make_atari

env_name = "PongNoFrameskip-v4"

env = wrap_deepmind(make_atari(env_name))
env = Monitor(env=env, directory="./video/{}".format(env_name), force=True)
env.record_start()
state = env.reset()
for t in itertools.count():
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    state = next_state
    if done:
        break
print("End at {}".format(t + 1))
env.record_end()
env.close()
示例#10
0
params.test_episodes = 1

env_name = str(params.env_name.split("-")[0])
params.log_dir = "../../logs/logs/DDPG-debug-seed{}/{}".format(
    params.seed, env_name)
params.actor_model_dir = "../../logs/models/DDPG-debug-seed{}/{}/actor/".format(
    params.seed, env_name)
params.critic_model_dir = "../../logs/models/DDPG-debug-seed{}/{}/critic/".format(
    params.seed, env_name)
params.video_dir = "../../logs/video/DDPG-debug-seed{}/{}/".format(
    params.seed, env_name)
params.plot_path = "../../logs/plots/DDPG-debug-seed{}/{}/".format(
    params.seed, env_name)

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir, force=True)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)

random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0],
                                          theta=0.15,
                                          mu=params.mu,
                                          sigma=params.sigma)

agent = DDPG_debug(Actor, Critic, env.action_space.shape[0], random_process,