Exemplo n.º 1
0
def train(alg, task):
    if task == 'reach':
        env_fn = lambda: SawyerReachEnv(n_substeps=25, reward_type='dense')
    elif task == 'grasp':
        env_fn = lambda: SawyerGraspEnv(n_substeps=5, reward_type='dense')

    ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)
    save_path = os.path.join(SAVE_PATH, task, alg)
    if alg == 'ppo':
        # mpi_fork(2)

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        ppo(env_fn=env_fn,
            steps_per_epoch=4000,
            epochs=20000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'ddpg':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/ddpg_suite',
                             exp_name=EXP_NAME)
        ddpg(env_fn=env_fn,
             steps_per_epoch=5000,
             batch_size=256,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'trpo':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/trpo_suite',
                             exp_name=EXP_NAME)
        trpo(env_fn=env_fn,
             ac_kwargs=ac_kwargs,
             steps_per_epoch=5000,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'td3':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        td3(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'sac':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        sac(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=200)
Exemplo n.º 2
0
 def test_cartpole(self):
     """
     Test training a small agent in a simple environment
     """
     env_fn = partial(gym.make, 'CartPole-v1')
     ac_kwargs = dict(hidden_sizes=(32, ))
     with tf.Graph().as_default():
         ppo(env_fn, steps_per_epoch=100, epochs=10, ac_kwargs=ac_kwargs)
Exemplo n.º 3
0
def main():

    import tensorflow as tf
    env_fn = lambda: Doubling()
    ac_kwargs = dict(hidden_sizes=[50, 50], activation=tf.nn.relu)
    logger_kwargs = dict(output_dir='output_dir3',
                         exp_name='training_64x64relu')
    ppo(env_fn=env_fn,
        ac_kwargs=ac_kwargs,
        steps_per_epoch=5000,
        epochs=25000000000,
        logger_kwargs=logger_kwargs,
        save_freq=1)
    def __call__(self, *args, **kwargs):

        ac_kwargs = dict(hidden_sizes=[400, 300, 200, 100],
                         activation=torch.nn.ReLU)

        logger_kwargs = dict(output_dir=self.outdir, exp_name=self.expt_name)

        ppo(
            env_fn=self.env,
            ac_kwargs=ac_kwargs,
            # steps_per_epoch=250,
            steps_per_epoch=1000,
            epochs=400,
            logger_kwargs=logger_kwargs)
Exemplo n.º 5
0
def main():
    args = {
        "forest_data_path":
        "/Users/anmartin/Projects/summer_project/hl_planner/forest_data.tiff",
        "num_measurements": 6,
        "max_forest_heights": [60, 90, 45, 38, 30, 76],
        "orbit_altitude": 757000,
    }
    env_fn = lambda: gym.make('gym_orekit:online-orekit-v0', **args)

    ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)
    logger_kwargs = dict(output_dir='./output', exp_name='test1')
    ppo(env_fn=env_fn,
        ac_kwargs=ac_kwargs,
        steps_per_epoch=20160,
        epochs=10,
        max_ep_len=20160,
        save_freq=2,
        logger_kwargs=logger_kwargs)
Exemplo n.º 6
0
def main():


    env_fn = lambda : gym.make('LunarLander-v2')

    ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)

    logger_kwargs = dict(output_dir='path/to/output_dir', exp_name='experiment_name')

    ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)

    # pass
    ######------play movie -----------------
    # movie = retro.Movie('SuperMarioBros-Nes-Level1-1-000000.bk2')
    # movie.step()

    # env = retro.make(
    #     game=movie.get_game(),
    #     state=None,
    #     # bk2s can contain any button presses, so allow everything
    #     use_restricted_actions=retro.Actions.ALL,
    #     players=movie.players,
    # )
    # env.initial_state = movie.get_state()
    # env.reset()

    # while movie.step():
    #     keys = []
    #     for p in range(movie.players):
    #         for i in range(env.num_buttons):
    #             keys.append(movie.get_key(i, p))
    #     env.step(keys)
    #     env.render()

    #########---------main RL program--------------
    env=retro.make(game='SuperMarioBros-Nes',record='.')
    obv=env.reset()
    for i in range(10000):
        obs,rew,done,info=env.step(env.action_space.sample())
        env.render()
    env.close()
Exemplo n.º 7
0
def run_tests(model_name=None):
    global epochs
    global env
    global ac
    env = gym.make('StrategyEnv-v0')
    test_tf = '20200201-20200207'
    # test_tf = '20200401-'
    env.set_timeframe(test_tf)
    env.full_reset()

    def make_env():
        env = gym.make('StrategyEnv-v0')
        env.set_timeframe('20191110-20200131')
        env.randomize_timeframe(True)
        env.set_ac(True)
        env.full_reset()
        return env

    #ask joey to run normal BT to compare
    run_test_with_strat(env)
    env.run_normal_bt()

    if model_name:
        torch.manual_seed(10000)
        np.random.seed(10000)
        ac = models.load_model(model_name, env.observation_space,
                               env.action_space)
    else:
        # ac = ppo(make_env, epochs=epochs, target_kl=0.001, steps_per_epoch=7200, max_ep_len=100000)
        ac = ppo(make_env,
                 epochs=epochs,
                 steps_per_epoch=7200,
                 max_ep_len=100000)
        model_name = models.save_model(ac)

    run_model_test(env, ac, model_name)
Exemplo n.º 8
0
    from spinup import ppo_pytorch as ppo
    from spinup.exercises.common import print_result
    from functools import partial
    import gym
    import os
    import pandas as pd
    import psutil
    import time

    logdir = "/tmp/experiments/%i" % int(time.time())

    ActorCritic = partial(exercise1_2_auxiliary.ExerciseActorCritic,
                          actor=MLPGaussianActor)

    ppo(env_fn=lambda: gym.make('MountainCarContinuous-v0'),
        actor_critic=ActorCritic,
        ac_kwargs=dict(hidden_sizes=(64, )),
        steps_per_epoch=4000,
        epochs=20,
        logger_kwargs=dict(output_dir=logdir))

    # Get scores from last five epochs to evaluate success.
    data = pd.read_table(os.path.join(logdir, 'progress.txt'))
    last_scores = data['AverageEpRet'][-5:]

    # Your implementation is probably correct if the agent has a score >500,
    # or if it reaches the top possible score of 1000, in the last five epochs.
    correct = np.mean(last_scores) > 500 or np.max(last_scores) == 1e3
    print_result(correct)
Exemplo n.º 9
0
        lam=float(lam))

# train with PPO
if algorithm == 'ppo':
    clip_ratio = sys.argv[2]
    target_kl = sys.argv[3]
    exp_name = 'll_ppo_seed' + str(seed) + '_epochs' + str(epochs)
    exp_name += '_cr' + clip_ratio + '_tk' + target_kl
    logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/',
                         exp_name=exp_name)
    ppo(env_fn=env_fn,
        ac_kwargs=ac_kwargs,
        max_ep_len=1000,
        gamma=0.99,
        seed=seed,
        steps_per_epoch=steps_per_epoch,
        pi_lr=0.005,
        vf_lr=0.005,
        epochs=epochs,
        logger_kwargs=logger_kwargs,
        clip_ratio=float(clip_ratio),
        target_kl=float(target_kl))

# train with TRPO
if algorithm == 'trpo':
    delta = sys.argv[2]
    backtrack_coef = sys.argv[3]
    exp_name = 'll_trpo_seed' + str(seed) + '_epochs' + str(epochs)
    exp_name += '_delta' + delta + '_bc' + backtrack_coef
    logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/',
                         exp_name=exp_name)
    trpo(env_fn=env_fn,
from spinup import ppo_tf1 as ppo
import tensorflow as tf
import gym

env_fn = lambda: gym.make('CartPole-v0')

ac_kwargs = dict(hidden_sizes=[8, 16, 8], activation=tf.nn.relu)

ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=300, epochs=250)
Exemplo n.º 11
0
	"""
	Run this file to verify your solution.
	"""

	from spinup import ppo
	from spinup.exercises.common import print_result
	import gym
	import os
	import pandas as pd
	import psutil
	import time
	import pybullet_envs

	logdir = "/tmp/experiments/%i"%int(time.time())

	tf_hidden_sizes = (64,)
	keras_hidden_sizes = (64,64)

	ppo(env_fn = lambda : gym.make('InvertedPendulumBulletEnv-v0'),
		ac_kwargs=dict(policy=mlp_gaussian_policy, hidden_sizes=keras_hidden_sizes),
		steps_per_epoch=4000, epochs=20, logger_kwargs=dict(output_dir=logdir))

	# Get scores from last five epochs to evaluate success.
	data = pd.read_table(os.path.join(logdir,'progress.txt'))
	last_scores = data['AverageEpRet'][-5:]

	# Your implementation is probably correct if the agent has a score >500,
	# or if it reaches the top possible score of 1000, in the last five epochs.
	correct = np.mean(last_scores) > 500 or np.max(last_scores)==1e3
	print_result(correct)
Exemplo n.º 12
0
 def test_cartpole(self):
     ''' Test training a small agent in a simple environment '''
     env_fn = partial(gym.make, 'CartPole-v1')
     ac_kwargs = dict(hidden_sizes=(32,))
     ppo(env_fn, steps_per_epoch=100, epochs=10, ac_kwargs=ac_kwargs)
Exemplo n.º 13
0
                                       noise=noise)
            dirOut = dirO + 'TFIM' + "P" + str(Nt) + '_N' + str(
                Ns) + '_rw' + rtype
        elif model == 'RandomTFIM':
            J_couplings = set_couplings(Ns, seed)
            env_fn = lambda: qenv.RandomTFIM(Ns,
                                             J_couplings,
                                             Nt,
                                             rtype,
                                             dt,
                                             actType,
                                             measured_obs=measured_obs,
                                             g_target=hfield,
                                             noise=noise)
            dirOut = dirO + 'RandomIsing' + "P" + str(Nt) + '_N' + str(
                Ns) + '_rw' + rtype
        else:
            raise ValueError(f'Invalid model:{model}')
        dirOut += '/' + measured_obs + '/network' + str(layers[0]) + 'x' + str(
            layers[1])
        ac_kwargs = dict(hidden_sizes=layers, activation=tf.nn.relu)
        logger_kwargs = dict(output_dir=dirOut, exp_name='RL_first_try')
        ppo(env_fn=env_fn,
            ac_kwargs=ac_kwargs,
            steps_per_epoch=nstep,
            epochs=epochs,
            logger_kwargs=logger_kwargs,
            gamma=1.0,
            target_kl=0.01,
            save_freq=128)
Exemplo n.º 14
0
    from spinup import ppo_pytorch as ppo
    from spinup.exercises.common import print_result
    from functools import partial
    from spinup.utils.run_utils import set_mujoco
    set_mujoco()
    import gym
    import os
    import pandas as pd
    import psutil
    import time

    logdir = "/tmp/experiments/%i" % int(time.time())

    ActorCritic = partial(exercise1_2_auxiliary.ExerciseActorCritic,
                          actor=MLPGaussianActor)

    ppo(env_fn=lambda: gym.make('InvertedPendulum-v2'),
        actor_critic=ActorCritic,
        ac_kwargs=dict(hidden_sizes=(64, )),
        steps_per_epoch=4000,
        epochs=20,
        logger_kwargs=dict(output_dir=logdir))

    # Get scores from last five epochs to evaluate success.
    data = pd.read_table(os.path.join(logdir, 'progress.txt'))
    last_scores = data['AverageEpRet'][-5:]

    # Your implementation is probably correct if the agent has a score >500,
    # or if it reaches the top possible score of 1000, in the last five epochs.
    correct = np.mean(last_scores) > 500 or np.max(last_scores) == 1e3
    print_result(correct)
Exemplo n.º 15
0
from spinup import ppo_tf1 as ppo
import tensorflow as tf
import gym
from gym.wrappers import FlattenObservation
import panda_gym

EPOCHS = 100
STEPS_PER_EPOCH = 4000
ENV = 'PandaPush-v1'


def env_fn():
    env = gym.make(ENV)
    print(env.observation_space)
    env = FlattenObservation(env)
    return env


ac_kwargs = dict(
    hidden_sizes=[64, 64],
    activation=tf.nn.sigmoid,
)

logger_kwargs = dict(output_dir=f'logs/sigmoid_{ENV}',
                     exp_name=f'exp_sigmoid_{ENV}')

ppo(env_fn=env_fn,
    ac_kwargs=ac_kwargs,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    logger_kwargs=logger_kwargs)
Exemplo n.º 16
0
#vpg
# spinup.vpg(
#     env,
#     ac_kwargs={"hidden_sizes":(64,2)},
#     seed = np.random.randint(100),
#     steps_per_epoch=1250,
#     epochs=2500,
#     pi_lr=3e-4,
#     logger_kwargs = {"output_dir" : "logs/vpgrandomtest"}
# )

#ppo
spinup.ppo(env,
           ac_kwargs={"hidden_sizes": (64, 2)},
           seed=np.random.randint(100),
           steps_per_epoch=1250,
           pi_lr=3e-4,
           epochs=2500,
           logger_kwargs={"output_dir": "logs/ppo-v3-0-rerun2"})

#polynomials
# spinup.vpgpolynomial(
#     env,
#     ac_kwargs={"order":3},
#     seed = np.random.randint(100),
#     steps_per_epoch=1250,
#     epochs=2500,
#     pi_lr=2e-5,
#     l1_scaling=0.001,
#     logger_kwargs = {"output_dir" : "logs/polyrandomtest"}
# )
Exemplo n.º 17
0
from spinup import ppo_pytorch as ppo
from spinup import ddpg_pytorch as ddpg
from spinup import sac_pytorch as sac
import tensorflow as tf
import gym
import torch
"""
env_fn = lambda : gym.make('Walker2d-v2')
ac_kwargs = dict(hidden_sizes=[64,64])
logger_kwargs = dict(output_dir='baseline_data/walker/ppo', exp_name='walker_ppo')
ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

#env_fn = lambda : gym.make('Walker2d-v2')
#ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)
logger_kwargs = dict(output_dir='baseline_data/walker/ddpg', exp_name='walker_ddpg')
ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

logger_kwargs = dict(output_dir='baseline_data/walker/sac', exp_name='walker_sac')
sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

env_fn = lambda : gym.make('Hopper-v2')
logger_kwargs = dict(output_dir='baseline_data/hopper/ppo', exp_name='hopper_ppo')
ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# env_fn = lambda : gym.make('Walker2d-v2')
# ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)
logger_kwargs = dict(output_dir='baseline_data/hopper/ddpg', exp_name='hopper_ddpg')
ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

logger_kwargs = dict(output_dir='baseline_data/hopper/sac', exp_name='hopper_sac')
sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)
Exemplo n.º 18
0
# Check this, it may not work
def envFunc():
    env = gym.make('airsim_gym-v0')
    return env


# Setup the environment function and hyperparameters
env_fn = envFunc
ac_kwargs = dict(hidden_sizes=[64, 64])
logger_kwargs = dict(
    output_dir='/home/isra/Documents/airsim_exp_results_random_seed_0',
    exp_name='random_goals')
ppo(env_fn=envFunc,
    ac_kwargs=ac_kwargs,
    seed=0,
    max_ep_len=500,
    steps_per_epoch=4000,
    epochs=250,
    logger_kwargs=logger_kwargs)
logger_kwargs = dict(
    output_dir='/home/isra/Documents/airsim_exp_results_random_seed_5',
    exp_name='random_goals')
ppo(env_fn=envFunc,
    ac_kwargs=ac_kwargs,
    seed=5,
    max_ep_len=500,
    steps_per_epoch=4000,
    epochs=250,
    logger_kwargs=logger_kwargs)
logger_kwargs = dict(
    output_dir='/home/isra/Documents/airsim_exp_results_random_seed_10',
Exemplo n.º 19
0
    """

    from spinup import ppo
    from spinup.exercises.common import print_result
    from spinup.user_config import INVERTEDPENDULUM_ENV, IMPORT_USER_MODULES
    import gym
    import os
    import pandas as pd
    import psutil
    import time

    import importlib
    for module in IMPORT_USER_MODULES:
        importlib.import_module(module)

    logdir = "/tmp/experiments/%i" % int(time.time())
    ppo(env_fn=lambda: gym.make(INVERTEDPENDULUM_ENV),
        ac_kwargs=dict(policy=mlp_gaussian_policy, hidden_sizes=(64, )),
        steps_per_epoch=4000,
        epochs=20,
        logger_kwargs=dict(output_dir=logdir))

    # Get scores from last five epochs to evaluate success.
    data = pd.read_table(os.path.join(logdir, 'progress.txt'))
    last_scores = data['AverageEpRet'][-5:]

    # Your implementation is probably correct if the agent has a score >500,
    # or if it reaches the top possible score of 1000, in the last five epochs.
    correct = np.mean(last_scores) > 500 or np.max(last_scores) == 1e3
    print_result(correct)
Exemplo n.º 20
0
import gym
import gym_geofriend2

from MapGenerators.Basic import Basic
from MapGenerators.Pyramid import Pyramid
from MapGenerators.HighPlatform import HighPlatform
from MapGenerators.TwoHighTowers import TwoHighTowers
from Player.Player import Player
from spinup import ppo
import tensorflow as tf

env_fn = lambda : gym.make("geofriend2-v0")

# ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)

logger_kwargs = dict(output_dir='spinupPpo', exp_name='experiment')

ppo(env_fn=env_fn, steps_per_epoch=5000, epochs=500, logger_kwargs=logger_kwargs)
from spinup.utils.run_utils import ExperimentGrid
from spinup import ppo
import tensorflow as tf
import gym

# todo: define env_fn in terms of some gym environment

env_fn = lambda: gym.make('LunarLanderContinuous-v2')
ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)
logger_kwargs = dict(output_dir='data/test1', exp_name='test1')

ppo(env_fn=env_fn,
    ac_kwargs=ac_kwargs,
    steps_per_epoch=5000,
    max_ep_len=1000,
    epochs=500,
    logger_kwargs=logger_kwargs)
Exemplo n.º 22
0
import gym
import gym_env
from spinup import ppo_pytorch as ppo
from gym_env.wrapper import PendulumCostWrapper

env = gym.make('Pendulum-v0')
env._max_episode_steps = 100
env = PendulumCostWrapper(env)

ppo(env_fn=lambda: env,
    ac_kwargs=dict(hidden_sizes=[16] * 2),
    gamma=0.99,
    max_ep_len=1000,
    lam=0.95,
    epochs=100000,
    seed=1)
Exemplo n.º 23
0
import torch
import gym
import numpy as np
from spinup import ppo_pytorch as ppo


def env_fn():
    import vortex_cartpole
    # We can pass a dictionary of arguments to the environment using kwargs
    #  headless : True or False, selects whether or not to use graphics rendering
    #  random_reset : if True, a random state is induced when the environment is reset
    #
    kwargs = {"headless": False, "random_reset": True}
    env = gym.make('VortexCartPole-v0', **kwargs)
    return env


# Test training am agent using pytorch PPO
ac_kwargs = dict(hidden_sizes=[32, 32], activation=torch.nn.ReLU)
ppo(env_fn,
    steps_per_epoch=1000,
    epochs=50,
    gamma=0.99,
    pi_lr=1e-3,
    vf_lr=1e-3,
    ac_kwargs=ac_kwargs)
Exemplo n.º 24
0
 def test_atari_env(self):
     ''' Test training a small agent in a simple environment '''
     env_fn = partial(gym.make, 'CarRacing-v0')
     ac_kwargs = dict(hidden_sizes=(32, ))
     with tf.Graph().as_default():
         ppo(env_fn, steps_per_epoch=100, epochs=10, ac_kwargs=ac_kwargs)
Exemplo n.º 25
0
    return pi, logp, logp_pi


if __name__ == '__main__':
    """
    Run this file to verify your solution.
    """

    from spinup import ppo_tf1 as ppo
    from spinup.exercises.common import print_result
    import gym
    import os
    import pandas as pd
    import psutil
    import time

    logdir = "/tmp/experiments/%i" % int(time.time())
    ppo(env_fn=lambda: gym.make('InvertedPendulum-v2'),
        ac_kwargs=dict(policy=mlp_gaussian_policy, hidden_sizes=(64, )),
        steps_per_epoch=4000,
        epochs=1,
        logger_kwargs=dict(output_dir=logdir))

    # Get scores from last five epochs to evaluate success.
    data = pd.read_table(os.path.join(logdir, 'progress.txt'))
    last_scores = data['AverageEpRet'][-5:]

    # Your implementation is probably correct if the agent has a score >500,
    # or if it reaches the top possible score of 1000, in the last five epochs.
    correct = np.mean(last_scores) > 500 or np.max(last_scores) == 1e3
    print_result(correct)
Exemplo n.º 26
0
def ppo_test():
    # ac_kwargs = dict(hidden_sizes=[64,64])

    # logger_kwargs = dict(output_dir='path/to/output_dir', exp_name='experiment_name')

    ppo(env_fn=env, steps_per_epoch=50, epochs=10)
Exemplo n.º 27
0
#vpg
# spinup.vpg(
#     env,
#     ac_kwargs={"hidden_sizes":(64,2)},
#     seed = np.random.randint(100),
#     steps_per_epoch=1250,
#     epochs=2500,
#     pi_lr=3e-4,
#     logger_kwargs = {"output_dir" : "logs/vpgrandomtest"}
# )

#ppo
spinup.ppo(env,
           ac_kwargs={"hidden_sizes": (64, 2)},
           seed=np.random.randint(100),
           steps_per_epoch=1250,
           pi_lr=3e-3,
           epochs=2500,
           logger_kwargs={"output_dir": "logs/ppo-dptest-uscaling1-lr3e3"})

#polynomials
# spinup.vpgpolynomial(
#     env,
#     ac_kwargs={"order":3},
#     seed = np.random.randint(100),
#     steps_per_epoch=1250,
#     epochs=2500,
#     pi_lr=2e-5,
#     l1_scaling=0.001,
#     logger_kwargs = {"output_dir" : "logs/polyrandomtest"}
# )
Exemplo n.º 28
0
        running = not done

        count += 1 
        if count > 100:
            break

    save_gif(color_images, path=color_output)
    save_gif(object_images, path=object_output)
    print("____________________________")
    print("Target: {}".format(env.target))
    print("Reward: {}".format(reward))
    print("____________________________")

def save_gif(images, path="example.gif"):
    with imageio.get_writer(path, mode='I') as writer:
        for image in images:
            writer.append_data(image)

if __name__ == '__main__':
    """
    Run the code to verify the solution
    """
    if to_train:
        logdir = "data/experiments/%i"%int(time.time())
        ppo(env_fn = GoalGridWorld,
            actor_critic=mlp_actor_critic,
            steps_per_epoch=100000, epochs=100, logger_kwargs=dict(output_dir=logdir))
    else:
        logdir = "data/experiments/%i/simple_save/"%int(exp_id)
        simulate(path=logdir)
Exemplo n.º 29
0
from spinup import ppo
import tensorflow as tf
import gym
import paint_svg
from paint_svg.algos.ppo.ppo import ppo

env_fn = lambda : gym.make('PaintSvg-v0')

ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)

logger_kwargs = dict(output_dir='test', exp_name='paint_svg')

ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)

# env = gym.make('PaintSvg-v0')
# env.reset()
Exemplo n.º 30
0
        super(MoveTowardZ, self).__init__(env)

    def action(self, action):
        action[2] = -.3
        return action


env = gym.make('panda-v0')
env = ProcessFrame84(env)
env = ImageToPyTorch(env)
env = MoveTowardZ(env)

image = env.reset()
plt.figure()
plt.imshow(image.squeeze(), cmap='gray')
plt.title('Example extracted screen')
plt.show()

env_fn = lambda: env
ac_kwargs = dict(hidden_sizes=[18, 64, 64], activation=nn.ReLU)

logger_kwargs = dict(output_dir='spinup', exp_name='panda_ppo')

#ppo(env_fn=env_fn,actor_critic=core.CNNActorCritic, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)
ppo(env_fn=env_fn,
    actor_critic=core.CNNActorCritic,
    ac_kwargs=ac_kwargs,
    steps_per_epoch=2,
    epochs=1,
    logger_kwargs=logger_kwargs)