def main(dir, interval):
    with logger.session(dir):
        saver = SnapshotSaver(dir, interval=interval)
        state = saver.get_state()
        alg_state = state['alg_state']
        env = alg_state['env_maker'].make()
        alg = state['alg']
        alg(env=env, snapshot_saver=saver, **alg_state)
Exemplo n.º 2
0
def main(dir, interval):
    with logger.session(dir):
        saver = SnapshotSaver(dir, interval=interval)
        state = saver.get_state()
        alg_state = state['alg_state']
        env = alg_state['env_maker'].make()
        alg = state['alg']
        alg(env=env, snapshot_saver=saver, **alg_state)
Exemplo n.º 3
0
def main(dir):
    env = None
    while True:
        saver = SnapshotSaver(dir)
        state = saver.get_state()
        if state is None:
            time.sleep(1)
            continue
        alg_state = state['alg_state']
        if env is None:
            env = alg_state['env_maker'].make()
        policy = alg_state['policy']
        ob = env.reset()
        done = False
        while not done:
            action, _ = policy.get_action(ob)
            ob, _, done, _ = env.step(action)
            env.render()
def main(dir):
    env = None
    while True:
        saver = SnapshotSaver(dir)
        state = saver.get_state()
        if state is None:
            time.sleep(1)
            continue
        alg_state = state['alg_state']
        if env is None:
            env = alg_state['env_maker'].make()
        policy = alg_state['policy']
        ob = env.reset()
        done = False
        while not done:
            action, _ = policy.get_action(ob)
            ob, _, done, _ = env.step(action)
            env.render()
Exemplo n.º 5
0
def main(dir):
    env = None
    while True:
        saver = SnapshotSaver(dir)
        state = saver.get_state()
        if state is None:
            time.sleep(1)
            continue
        alg_state = state['alg_state']
        if env is None:
            # save videos of all episodes to monitor_dir
            env = alg_state['env_maker'].make(
                video_callable=lambda episode_id: True)
        policy = alg_state['policy']
        ob = env.reset()
        done = False
        while not done:
            action, _ = policy.get_action(ob)
            ob, _, done, _ = env.step(action)
            env.render()
def run(v):
    np.random.seed(v['seed'])
    env_maker = EnvMaker('Pendulum-v0')
    env = env_maker.make()
    policy = GaussianMLPPolicy(
        observation_space=env.observation_space,
        action_space=env.action_space,
        env_spec=env.spec,
        hidden_sizes=(64, 64),
        hidden_nonlinearity=chainer.functions.tanh,
    )
    if v['baseline'] == 'mlp':
        baseline = MLPBaseline(
            observation_space=env.observation_space,
            action_space=env.action_space,
            env_spec=env.spec,
            hidden_sizes=(64, 64),
            hidden_nonlinearity=chainer.functions.tanh,
        )
    elif v['baseline'] == 'time_dependent':
        baseline = TimeDependentBaseline(
            observation_space=env.observation_space,
            action_space=env.action_space,
            env_spec=env.spec,
        )
    elif v['baseline'] == 'linear_feature':
        baseline = LinearFeatureBaseline(
            observation_space=env.observation_space,
            action_space=env.action_space,
            env_spec=env.spec,
        )
    else:
        raise ValueError
    trpo(
        env=env,
        env_maker=env_maker,
        n_envs=16,
        policy=policy,
        baseline=baseline,
        batch_size=10000,
        n_iters=100,
        snapshot_saver=SnapshotSaver(logger.get_dir()),
    )
def run(v):
    np.random.seed(v['seed'])
    env_maker = EnvMaker('CartPole-v0')
    env = env_maker.make()
    policy = CategoricalMLPPolicy(
        observation_space=env.observation_space,
        action_space=env.action_space,
        env_spec=env.spec
    )
    baseline = MLPBaseline(
        observation_space=env.observation_space,
        action_space=env.action_space,
        env_spec=env.spec
    )
    trpo(
        env=env,
        env_maker=env_maker,
        n_envs=16,
        policy=policy,
        baseline=baseline,
        batch_size=2000,
        n_iters=100,
        snapshot_saver=SnapshotSaver(logger.get_dir())
    )
Exemplo n.º 8
0
#!/usr/bin/env python
from algs import a2c
from env_makers import EnvMaker
from models import CategoricalCNNPolicy
from utils import SnapshotSaver
import numpy as np
import os
import logger

log_dir = "data/local/a2c-pong"

np.random.seed(42)

# Clean up existing logs
os.system("rm -rf {}".format(log_dir))

with logger.session(log_dir):
    env_maker = EnvMaker('PongNoFrameskip-v4')
    env = env_maker.make()
    policy = CategoricalCNNPolicy(env.observation_space, env.action_space,
                                  env.spec)
    vf = policy.create_vf()
    a2c(
        env=env,
        env_maker=env_maker,
        n_envs=16,
        policy=policy,
        vf=vf,
        snapshot_saver=SnapshotSaver(log_dir, interval=10),
    )
log_dir = "data/local/trpo-cartpole"

np.random.seed(42)

# Clean up existing logs
os.system("rm -rf {}".format(log_dir))

with logger.session(log_dir):
    env_maker = EnvMaker('CartPole-v0')
    env = env_maker.make()
    policy = CategoricalMLPPolicy(
        observation_space=env.observation_space,
        action_space=env.action_space,
        env_spec=env.spec
    )
    baseline = MLPBaseline(
        observation_space=env.observation_space,
        action_space=env.action_space,
        env_spec=env.spec
    )
    trpo(
        env=env,
        env_maker=env_maker,
        n_envs=16,
        policy=policy,
        baseline=baseline,
        batch_size=2000,
        n_iters=100,
        snapshot_saver=SnapshotSaver(log_dir)
    )