Exemplos de make em Python, exemplos de gym.envs.make em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_registration.py Projeto: DEVESHTARASIA/gym

def test_make_deprecated():
    try:
        envs.make('Humanoid-v0')
    except error.Error:
        pass
    else:
        assert False

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_mujoco_v2_to_v3_conversion.py Projeto: openai/gym

def verify_environments_match(old_environment_id,
                              new_environment_id,
                              seed=1,
                              num_actions=1000):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)

    old_environment.seed(seed)
    new_environment.seed(seed)

    old_reset_observation = old_environment.reset()
    new_reset_observation = new_environment.reset()

    np.testing.assert_allclose(old_reset_observation, new_reset_observation)

    for i in range(num_actions):
        action = old_environment.action_space.sample()
        old_observation, old_reward, old_done, old_info = old_environment.step(
            action)
        new_observation, new_reward, new_done, new_info = new_environment.step(
            action)

        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
        np.testing.assert_allclose(old_done, new_done, atol=eps)

        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_mujoco_v2_to_v3_conversion.py Projeto: swp930/RLgym

def verify_environments_match(old_environment_id,
                              new_environment_id,
                              seed=1,
                              num_actions=1000):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)

    old_environment.seed(seed)
    new_environment.seed(seed)

    old_reset_observation = old_environment.reset()
    new_reset_observation = new_environment.reset()

    np.testing.assert_allclose(old_reset_observation, new_reset_observation)

    for i in range(num_actions):
        action = old_environment.action_space.sample()
        old_observation, old_reward, old_done, old_info = old_environment.step(
            action)
        new_observation, new_reward, new_done, new_info = new_environment.step(
            action)

        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
        np.testing.assert_allclose(old_done, new_done, atol=eps)

        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)

Exemplo n.º 4

0

Exibir arquivo

def test_make_deprecated():
    try:
        envs.make("Humanoid-v0")
    except error.Error:
        pass
    else:
        assert False

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_envs.py Projeto: arboo/gym

def test_random_rollout():
    for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
        agent = lambda ob: env.action_space.sample()
        ob = env.reset()
        for _ in range(10):
            assert env.observation_space.contains(ob)
            a = agent(ob)
            assert env.action_space.contains(a)
            (ob, _reward, done, _info) = env.step(a)
            if done: break

Exemplo n.º 6

0

Exibir arquivo

def test_random_rollout():
    for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
        agent = lambda ob: env.action_space.sample()
        ob = env.reset()
        for _ in range(10):
            assert env.observation_space.contains(ob)
            a = agent(ob)
            assert env.action_space.contains(a)
            (ob, _reward, done, _info) = env.step(a)
            if done: break

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_envs.py Projeto: thepinkturtle/gym

def test_env_render_result_is_immutable():
    environs = [
        envs.make("Taxi-v3"),
        envs.make("FrozenLake-v1"),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode="ansi")
        assert isinstance(output, str)
        env.close()

Exemplo n.º 8

0

Exibir arquivo

def test_env_render_result_is_immutable():
    environs = [
        envs.make('Taxi-v3'),
        envs.make('FrozenLake-v0'),
        envs.make('Reverse-v0'),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode='ansi')
        assert isinstance(output, str)
        env.close()

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_envs.py Projeto: jiapei100/gym

def test_env_render_result_is_immutable():
    from six import string_types
    environs = [
        envs.make('Taxi-v2'),
        envs.make('FrozenLake-v0'),
        envs.make('Reverse-v0'),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode='ansi')
        assert isinstance(output, string_types)
        env.close()

Exemplo n.º 10

0

Exibir arquivo

def test_make_with_kwargs():
    env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3")
    assert env.spec.id == "test.ArgumentEnv-v0"
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == "arg1"
    assert env.arg2 == "override_arg2"
    assert env.arg3 == "override_arg3"

Exemplo n.º 11

0

Exibir arquivo

Arquivo: minecraft_example.py Projeto: exhaustin/minecraft-env

def main():
    # Initialize environment
    env = envs.make('minecraft-v0')
    env.reset()

    # Get action space
    #action_space = env.action_space

    # Plan some random stuff to do
    actions = [  # agent_id, action_num
        (1, 0),
        (2, 4),
        (2, 3),
        (2, 0),
        (1, 3),
        (2, 2),
        (2, 5),  # gets reward of 1
    ]

    # Do some random stuff to the env for 2 times
    for i in range(2):
        for a in actions:
            state1, reward, done, info = env.step(a)

            if i < 1:
                # output results
                print('reward: ', reward)
                print('facing:', state1['facing'])
                print('position:', state1['position'])
                print(state1['view'][::-1, 1, :])

        # Reset env
        env.reset()

Exemplo n.º 12

0

Exibir arquivo

def test_grayscale():
    env = envs.make('Pong-v0')
    env.env._obs_type = 'grayscale_image'
    ob = env.reset()
    assert ob.shape == (210, 160, 1)
    ob = env.render('grayscale_array')
    assert ob.shape == (210, 160, 1)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_registration.py Projeto: thepinkturtle/gym

def test_env_version_suggestions(register_some_envs, env_id_input,
                                 suggested_versions, default_version):
    if default_version:
        match_str = "provides the default version"
        with pytest.raises(
                error.DeprecatedEnv,
                match=match_str,
        ):
            envs.make(env_id_input)
    else:
        match_str = f"versioned environments: \\[ {suggested_versions} \\]"
        with pytest.raises(
                error.UnregisteredEnv,
                match=match_str,
        ):
            envs.make(env_id_input)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_registration.py Projeto: jiapei100/gym

def test_make_with_kwargs():
    env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3')
    assert env.spec.id == 'test.ArgumentEnv-v0'
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == 'arg1'
    assert env.arg2 == 'override_arg2'
    assert env.arg3 == 'override_arg3'

Exemplo n.º 15

0

Exibir arquivo

def test_serialize_deserialize():
    env1 = envs.make('HandReach-v0', distance_threshold=1e-6)
    env1.reset()
    env2 = pickle.loads(pickle.dumps(env1))

    assert env1.distance_threshold == env2.distance_threshold, (
        env1.distance_threshold, env2.distance_threshold)

Exemplo n.º 16

0

Exibir arquivo

def test_serialize_deserialize(environment_id):
    env1 = envs.make(environment_id, target_position='fixed')
    env1.reset()
    env2 = pickle.loads(pickle.dumps(env1))

    assert env1.target_position == env2.target_position, (env1.target_position,
                                                          env2.target_position)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_registration.py Projeto: MatildeDP/DDPG-and-PID-in-Lunar-Lander-Env

def test_make_with_kwargs():
    env = envs.make('test.ArgumentEnv-v0',
                    arg2='override_arg2',
                    arg3='override_arg3')
    assert env.spec.id == 'test.ArgumentEnv-v0'
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == 'arg1'
    assert env.arg2 == 'override_arg2'
    assert env.arg3 == 'override_arg3'

Exemplo n.º 18

0

Exibir arquivo

def test_serialize_deserialize(environment_id):
    env = envs.make(environment_id)
    env.reset()

    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step([0.1])

    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(0.1)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_adserver.py Projeto: wizcap/gym-adserver

def test_environment_reset():
    # Arrange 
    env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10)

    # Act
    (ads, impressions, clicks) = env.reset('Test')

    # Assert
    assert clicks == 0
    assert impressions == 0
    assert ads == [Ad(0), Ad(1)]

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_adserver.py Projeto: wizcap/gym-adserver

def test_environment_step_with_reward():
    # Arrange
    env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10, reward_policy=lambda x: 1)
    env.reset(scenario_name='Test')

    # Act
    ((ads, impressions, clicks), reward, done, info) = env.step(1)

    # Assert
    assert clicks == 1
    assert impressions == 1
    assert info == {}
    assert reward == 1
    assert not done
    assert ads == [Ad(0), Ad(1, impressions=1, clicks=1)]

Exemplo n.º 21

0

Exibir arquivo

def make_envs_by_action_space_type(spec_list: List[EnvSpec],
                                   action_space: Space):
    """Make environments of specific action_space type.

    This function returns a filtered list of environment from the
    spec_list that matches the action_space type.

    Args:
        spec_list (list): list of registered environments' specification
        action_space (gym.spaces.Space): action_space type
    """
    filtered_envs = []
    for spec in spec_list:
        env = envs.make(spec.id)
        if isinstance(env.action_space, action_space):
            filtered_envs.append(env)
    return filtered_envs

Exemplo n.º 22

0

Exibir arquivo

def test_box_actions_out_of_bound(env, seed):
    """Test out of bound actions in Box action_space.

    Environments with Box actions spaces perform clipping inside `step`.
    The expected behaviour is that an action `out-of-bound` has the same effect
    of an action with value exactly at the upper (or lower) bound.

    Args:
        env (gym.Env): the gym environment
        seed (int): seed value for determinism
    """
    OOB_VALUE = 100

    env.reset(seed=seed)

    oob_env = envs.make(env.spec.id)
    oob_env.reset(seed=seed)

    dtype = env.action_space.dtype

    upper_bounds = env.action_space.high
    lower_bounds = env.action_space.low

    for i, (is_upper_bound, is_lower_bound) in enumerate(
            zip(env.action_space.bounded_above,
                env.action_space.bounded_below)):
        if is_upper_bound:
            obs, _, _, _ = env.step(upper_bounds)
            oob_action = upper_bounds.copy()
            oob_action[i] += np.cast[dtype](OOB_VALUE)

            assert oob_action[i] > upper_bounds[i]
            oob_obs, _, _, _ = oob_env.step(oob_action)

            assert np.alltrue(obs == oob_obs)

        if is_lower_bound:
            obs, _, _, _ = env.step(lower_bounds)
            oob_action = lower_bounds.copy()
            oob_action[i] -= np.cast[dtype](OOB_VALUE)

            assert oob_action[i] < lower_bounds[i]
            oob_obs, _, _, _ = oob_env.step(oob_action)

            assert np.alltrue(obs == oob_obs)

Exemplo n.º 23

0

Exibir arquivo

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--raw_actions', action='store_true')
    args = parser.parse_args()

    env = envs.make('Go9x9-v0')
    env.reset()
    while True:
        s = env._state
        env._render()

        colorstr = pachi_py.color_to_str(s.color)
        if args.raw_actions:
            a = int(raw_input('{} (raw)> '.format(colorstr)))
        else:
            coordstr = raw_input('{}> '.format(colorstr))
            a = go.str_to_action(s.board, coordstr)

        _, r, done, _ = env.step(a)
        if done:
            break

    print('You win!' if r > 0 else 'Opponent wins!')
    print('Final score:', env._state.board.official_score)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: main.py Projeto: ceobillionaire/trpo

        env = self.env
        ret = []
        for o, r, d in zip(observation_n, reward_n, done_n):
            o = env.observation_convert(o, env._env.observation_space, env.observation_space)  
            obs = np.expand_dims(o, 0)
            action_dist_n = self.session.run(self.action_dist_n, {self.obs: obs})
            action = int(np.argmax(action_dist_n, 1)[0])
            action = env.action_convert(action, env.action_space, env._env.action_space)
            ret.append(action)
        return ret


experiment_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)
print ("taks = {}".format(args.task))
env = envs.make(args.task)


env.monitor.start(experiment_dir)

agent = ContinTRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(experiment_dir, algorithm_id=algo)


print (experiment_dir)

from sys import argv
print ('python {}'.format(' '.join(argv)))

Exemplo n.º 25

0

Exibir arquivo

def test_make():
    env = envs.make("CartPole-v0")
    assert env.spec.id == "CartPole-v0"
    assert isinstance(env.unwrapped, cartpole.CartPoleEnv)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: main.py Projeto: wojzaremba/trpo

                stats["KL between old and new distribution"] = kloldnew
                stats["Surrogate loss"] = surrafter
                for k, v in stats.iteritems():
                    print(k + ": " + " " * (40 - len(k)) + str(v))
                if entropy != entropy:
                    exit(-1)
                if exp > 0.8:
                    self.train = False
            i += 1

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

if len(sys.argv) > 1:
    task = sys.argv[1]
else:
    task = "RepeatCopy-v0"

env = envs.make(task)
env.monitor.start(training_dir)

env = SpaceConversionEnv(env, Box, Discrete)

agent = TRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(training_dir,
           algorithm_id='trpo_ff')

Exemplo n.º 27

0

Exibir arquivo

Arquivo: run_cem.py Projeto: BonsaiAI/modular_rl

from .modular_rl import *
import argparse, sys, pickle, shutil
import gym, logging

from tabulate import tabulate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    update_argument_parser(parser, GENERAL_OPTIONS)
    parser.add_argument("--env", required=True)
    parser.add_argument("--agent", required=True)
    parser.add_argument("--plot", action="store_true")
    args, _ = parser.parse_known_args(
        [arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])
    env = make(args.env)
    env_spec = env.spec
    mondir = args.outfile + ".dir"
    if os.path.exists(mondir): shutil.rmtree(mondir)
    os.mkdir(mondir)
    env.monitor.start(mondir,
                      video_callable=None if args.video else VIDEO_NEVER)
    agent_ctor = get_agent_cls(args.agent)
    update_argument_parser(parser, agent_ctor.options)
    update_argument_parser(parser, CEM_OPTIONS)
    args = parser.parse_args()
    cfg = args.__dict__
    agent = agent_ctor(env.observation_space, env.action_space, cfg)
    np.random.seed(args.seed)
    hdf, diagnostics = prepare_h5_file(args)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: realtime_sim.py Projeto: SergiPonsa/Mujoco_TFM

                        type=str,
                        help="name of the environment. Options: Gen3-v0")
    parser.add_argument("--mode",
                        choices=["noop", "random", "human"],
                        default="random",
                        help="mode of the agent")
    parser.add_argument("--max_steps",
                        type=int,
                        default=0,
                        help="maximum episode length")
    parser.add_argument("--fps", type=float)
    parser.add_argument("--once", action="store_true")
    parser.add_argument("--ignore_done", action="store_true")
    args = parser.parse_args()

    env = envs.make(args.env)
    action_space = env.action_space
    mode = args.mode
    fps = args.fps or env.metadata.get('video.frames_per_second') or 100
    if args.max_steps == 0:
        args.max_steps = env.spec.tags[
            'wrapper_config.TimeLimit.max_episode_steps']
        print("max_steps = ", args.max_steps)

    print("Press ESC to quit")
    reward = 0
    done = False
    if mode == "random":
        agent = RandomAgent(action_space)
    elif mode == "noop":
        agent = NoopAgent(action_space)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: print_env_randreturn.py Projeto: 1769948908/imitation

import numpy as np

import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler())

num_trials = 50

print 'Name & Random policy performance'

names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1']
for env_name in names:
    env = envs.make(env_name)

    returns = []
    for _ in xrange(num_trials):
        env.reset()
        ret = 0.
        for _ in xrange(env.spec.timestep_limit):
            _, r, done, _ = env.step(env.action_space.sample())
            ret += r
            if done: break
        returns.append(ret)

    print '{} & {} \pm {}'.format(env_name, np.mean(returns), np.std(returns))

Exemplo n.º 30

0

Exibir arquivo

Arquivo: kerlym.py Projeto: chemouda/kerlym

parser.add_option("-d", "--discount", dest="discount", default=0.99, type='float',      help="Discount rate for future reards [%default]")
parser.add_option("-t", "--num_frames", dest="nframes", default=2, type='int',          help="Number of Sequential observations/timesteps to store in a single example [%default]")
parser.add_option("-m", "--max_mem", dest="maxmem", default=100000, type='int',         help="Max number of samples to remember [%default]")
parser.add_option("-P", "--plots", dest="plots", action="store_true", default=False,    help="Plot learning statistics while running [%default]")
parser.add_option("-F", "--plot_rate", dest="plot_rate", default=10, type='int',        help="Plot update rate in episodes [%default]")
parser.add_option("-S", "--submit", dest="submit", action="store_true", default=False,  help="Submit Results to OpenAI [%default]")
parser.add_option("-a", "--agent", dest="agent", default="ddqn",                        help="Which learning algorithm to use [%default]")
(options, args) = parser.parse_args()

print options.agent

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

from gym import envs
env = envs.make(options.env)
if options.submit:
    env.monitor.start(training_dir)

import dqn
agent_constructor = {
    "dqn":dqn.DQN,
    "ddqn":dqn.D2QN
}[options.agent]

agent = agent_constructor(env, nframes=options.nframes, epsilon=options.epsilon, discount=options.discount, modelfactory=eval("dqn.%s"%(options.net)),
                    epsilon_schedule=lambda episode,epsilon: max(0.05, epsilon*(1-options.epsilon_decay)),
                    update_nsamp=options.update_size, batch_size=options.bs, dropout=options.dropout,
                    timesteps_per_batch=options.update_freq, stats_rate=options.plot_rate,
                    enable_plots = options.plots, max_memory = options.maxmem )
agent.learn()

Exemplo n.º 31

0

Exibir arquivo

                  default=False,
                  help="Submit Results to OpenAI [%default]")
parser.add_option("-a",
                  "--agent",
                  dest="agent",
                  default="ddqn",
                  help="Which learning algorithm to use [%default]")
(options, args) = parser.parse_args()

print options.agent

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

from gym import envs
env = envs.make(options.env)
if options.submit:
    env.monitor.start(training_dir)

import dqn
agent_constructor = {"dqn": dqn.DQN, "ddqn": dqn.D2QN}[options.agent]

agent = agent_constructor(env,
                          nframes=options.nframes,
                          epsilon=options.epsilon,
                          discount=options.discount,
                          modelfactory=eval("dqn.%s" % (options.net)),
                          epsilon_schedule=lambda episode, epsilon: max(
                              0.05, epsilon * (1 - options.epsilon_decay)),
                          update_nsamp=options.update_size,
                          batch_size=options.bs,

Exemplo n.º 32

0

Exibir arquivo

Arquivo: main.py Projeto: zzmjohn/trpo

        ret = []
        for o, r, d in zip(observation_n, reward_n, done_n):
            o = env.observation_convert(o, env._env.observation_space,
                                        env.observation_space)
            obs = np.expand_dims(o, 0)
            action_dist_n = self.session.run(self.action_dist_n,
                                             {self.obs: obs})
            action = int(np.argmax(action_dist_n, 1)[0])
            action = env.action_convert(action, env.action_space,
                                        env._env.action_space)
            ret.append(action)
        return ret


experiment_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)
print("taks = {}".format(args.task))
env = envs.make(args.task)

env.monitor.start(experiment_dir)

agent = ContinTRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(experiment_dir, algorithm_id=algo)

print(experiment_dir)

from sys import argv
print('python {}'.format(' '.join(argv)))

Exemplo n.º 33

0

Exibir arquivo

Arquivo: test_registration.py Projeto: tima04/gym

def test_make():
    env = envs.make("CartPole-v0")
    assert env.spec.id == "CartPole-v0"
    assert isinstance(env, cartpole.CartPoleEnv)

Exemplo n.º 34

0

Exibir arquivo

    def __init__(self,
                 experiment="Breakout-v0",
                 env=None,
                 nthreads=16,
                 nframes=1,
                 epsilon=0.5,
                 enable_plots=False,
                 render=False,
                 learning_rate=1e-4,
                 modelfactory=networks.simple_cnn,
                 difference_obs=True,
                 preprocessor=preproc.karpathy_preproc,
                 discount=0.99,
                 batch_size=32,
                 epsilon_min=0.05,
                 epsilon_schedule=None,
                 stats_rate=10,
                 **kwargs):
        self.kwargs = kwargs
        self.experiment = experiment
        if env == None:
            env = lambda: envs.make(self.experiment)
        self.nthreads = nthreads
        self.env = map(lambda x: env(), range(0, self.nthreads))
        self.model_factory = modelfactory
        self.nframes = nframes
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_schedule = epsilon_schedule
        self.gamma = discount
        self.preprocessor = preprocessor
        self.difference_obs = difference_obs
        self.network_update_frequency = batch_size
        self.target_network_update_frequency = 10000
        self.T = 0
        self.TMAX = 80000000
        self.checkpoint_interval = 600
        self.checkpoint_dir = "/tmp/"
        self.enable_plots = enable_plots
        self.stats_rate = stats_rate
        self.ipy_clear = False
        self.next_plot = 0
        self.e = 0
        self.render = render

        self.render_rate_hz = 5.0
        self.render_ngames = 2
        self.plot_q = Queue.Queue()

        # set up output shape to be either pre-processed or not
        if not self.preprocessor == None:
            print(self.env[0].observation_space.shape)
            o = self.preprocessor(np.zeros(
                self.env[0].observation_space.shape))
            self.input_dim_orig = [self.nframes] + list(o.shape)
        else:
            self.input_dim_orig = [self.nframes] + list(
                self.env[0].observation_space.shape)
        self.input_dim = np.product(self.input_dim_orig)
        print(self.input_dim, self.input_dim_orig)

        # set up plotting storage
        self.stats = None
        if self.enable_plots:
            self.stats = {
                "tr": statbin(self.stats_rate),  # Total Reward
                "ft": statbin(self.stats_rate),  # Finishing Time
                "minvf": statbin(self.stats_rate),  # Min Value Fn
                "maxvf": statbin(self.stats_rate),  # Min Value Fn
                "cost": statbin(self.stats_rate),  # Loss
            }

        # set up the TF session
        self.session = tf.Session()
        K.set_session(self.session)
        self.setup_graphs()
        self.saver = tf.train.Saver()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: rlgymenv.py Projeto: tzs930/deeprl_practice_colab

 def __init__(self, env_name):
     self.env = envs.make(env_name)
     self.action_space = self.env.action_space
     self.curr_obs = self.env.reset()
     self.is_done = False

Exemplo n.º 36

0

Exibir arquivo

import gym
from gym import spaces, envs
import argparse
import numpy as np
import itertools
import time
from builtins import input
import random

from mujoco_py.modder import TextureModder, MaterialModder
import cv2

from functions_mpi import *



comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
number_to_experiment = [1,3,5,107,109,111]

if rank == 0:
    print("Hello I am the master rank", str(rank), "of", str(size))
    env = envs.make("FetchSlide-v1")
    MasterProgramCrossEntropy(env,size,comm)
else:
    print("Hello I am the slave rank", str(rank), "of", str(size))
    env = envs.make("FetchSlide-v1")
    #SlaveProgram(rank,env)
    SlaveProgramCrossEntropyExperimentReward(rank,env,comm,number_to_experiment )

Exemplo n.º 37

0

Exibir arquivo

Arquivo: run_pg.py Projeto: domluna/ml_p5_capstone

import argparse, sys, cPickle
from tabulate import tabulate
import shutil, os, logging
import gym
import numpy as np


if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    update_argument_parser(parser, GENERAL_OPTIONS)
    parser.add_argument("--env", required=True)
    parser.add_argument("--agent", required=True)
    parser.add_argument("--plot", action="store_true")
    args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])

    env = make(args.env)
    env_spec = env.spec

    mondir = args.outfile + ".dir"
    if os.path.exists(mondir): shutil.rmtree(mondir)
    os.mkdir(mondir)
    env.monitor.start(mondir, video_callable=None if args.video else VIDEO_NEVER)
    agent_ctor = get_agent_cls(args.agent)
    update_argument_parser(parser, agent_ctor.options)
    args = parser.parse_args()
    if args.timestep_limit == 0:
        args.timestep_limit = env_spec.timestep_limit
    cfg = args.__dict__
    np.random.seed(args.seed)

    print env.observation_space, env.action_space

Exemplo n.º 38

0

Exibir arquivo

Arquivo: test_registration.py Projeto: thepinkturtle/gym

def test_env_suggestions(register_some_envs, env_id_input, env_id_suggested):
    with pytest.raises(error.UnregisteredEnv,
                       match=f"Did you mean: `{env_id_suggested}` ?"):
        envs.make(env_id_input)

Exemplo n.º 39

0

Exibir arquivo

Arquivo: run.py Projeto: AllanYiin/CNTK

                        'random number generator. Negative value is ignored.')
    args = parser.parse_args()

    if (args.seed >= 0):
        np.random.seed(args.seed)

    # Use xrange for python 2.7 to speed up.
    if sys.version_info.major < 3:
        range = xrange

    # Create an OpenAI Gym environment, and obtain its state/action
    # information.
    if args.env not in envs.registry.env_specs.keys():
        # Try to find from local environment libraries.
        env_factory.register_env(args.env)
    env = envs.make(args.env)
    o_space = env.observation_space
    a_space = env.action_space
    image_observation = True if isinstance(
        env.env, AtariEnv) and env.env._obs_type == 'image' else False
    print("Loaded environment '{0}'".format(args.env))
    print("Observation space: '{0}'".format(o_space))
    print("Action space: '{0}'".format(a_space))
    print('Is observation an image: {0}'.format(image_observation))

    if args.max_episode_steps <= 0:
        args.max_episode_steps = \
            env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']

    # Create an agent.
    agent = agent_factory.make_agent(args.agent_config,

Exemplo n.º 40

0

Exibir arquivo

Arquivo: print_env_info.py Projeto: 1769948908/imitation

import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler())

names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1']
for n in names:
    env = envs.make(n)

    aspace = env.action_space
    if isinstance(aspace, spaces.Box):
        acont = True
        asize = aspace.low.shape[0]
    else:
        acont = False
        asize = aspace.n

    ospace = env.observation_space
    if isinstance(ospace, spaces.Box):
        ocont = True
        osize = ospace.low.shape[0]
    else:
        ocont = False
        osize = ospace.n

    print '{} & {} ({}) & {} ({}) \\\\'.format(n, osize, 'continuous' if ocont else 'discrete', asize, 'continuous' if acont else 'discrete')

Exemplo n.º 41

0

Exibir arquivo

                    print(k + ": " + " " * (40 - len(k)) + str(v))
                if entropy != entropy:
                    exit(-1)
                if exp > 0.8:
                    self.train = False
            i += 1


training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

# if len(sys.argv) > 1:
#     print(9284)
#     task = sys.argv[1]
# else:
#     print(8966)
#     task = "RepeatCopy-v0"

task = "CartPole-v0"
# task = "MountainCar-v0"

env = envs.make(task)
# env.monitor.start(training_dir)

# env = SpaceConversionEnv(env, Box, Discrete)

agent = TRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(training_dir, algorithm_id='trpo_ff')

Exemplo n.º 42

0

Exibir arquivo

Arquivo: test_registration.py Projeto: thepinkturtle/gym

def test_return_latest_versioned_env(register_some_envs):
    with pytest.warns(UserWarning):
        env = envs.make("MyAwesomeNamespace/MyAwesomeVersionedEnv")
    assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5"

Exemplo n.º 43

0

Exibir arquivo

import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging
logging.getLogger('gym.core').addHandler(logging.NullHandler())

names = [
    'CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1',
    'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1'
]
for n in names:
    env = envs.make(n)

    aspace = env.action_space
    if isinstance(aspace, spaces.Box):
        acont = True
        asize = aspace.low.shape[0]
    else:
        acont = False
        asize = aspace.n

    ospace = env.observation_space
    if isinstance(ospace, spaces.Box):
        ocont = True
        osize = ospace.low.shape[0]
    else:
        ocont = False
        osize = ospace.n

    print '{} & {} ({}) & {} ({}) \\\\'.format(

Exemplo n.º 44

0

Exibir arquivo

Arquivo: dqn.py Projeto: ChenglongChen/kerlym

    def __init__(
        self,
        experiment="Breakout-v0",
        env=None,
        nthreads=16,
        nframes=1,
        epsilon=0.5,
        enable_plots=False,
        render=False,
        learning_rate=1e-4,
        modelfactory=networks.simple_cnn,
        difference_obs=True,
        preprocessor=preproc.karpathy_preproc,
        discount=0.99,
        batch_size=32,
        epsilon_min=0.05,
        epsilon_schedule=None,
        stats_rate=10,
        **kwargs
    ):
        self.kwargs = kwargs
        self.experiment = experiment
        if env == None:
            env = lambda: envs.make(self.experiment)
        self.nthreads = nthreads
        self.env = map(lambda x: env(), range(0, self.nthreads))
        self.model_factory = modelfactory
        self.nframes = nframes
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_schedule = epsilon_schedule
        self.gamma = discount
        self.preprocessor = preprocessor
        self.difference_obs = difference_obs
        self.network_update_frequency = batch_size
        self.target_network_update_frequency = 10000
        self.T = 0
        self.TMAX = 80000000
        self.checkpoint_interval = 600
        self.checkpoint_dir = "/tmp/"
        self.enable_plots = enable_plots
        self.stats_rate = stats_rate
        self.ipy_clear = False
        self.next_plot = 0
        self.e = 0
        self.render = render

        self.render_rate_hz = 5.0
        self.render_ngames = 2
        self.plot_q = Queue.Queue()

        # set up output shape to be either pre-processed or not
        if not self.preprocessor == None:
            print self.env[0].observation_space.shape
            o = self.preprocessor(np.zeros(self.env[0].observation_space.shape))
            self.input_dim_orig = [self.nframes] + list(o.shape)
        else:
            self.input_dim_orig = [self.nframes] + list(self.env[0].observation_space.shape)
        self.input_dim = np.product(self.input_dim_orig)
        print self.input_dim, self.input_dim_orig

        # set up plotting storage
        self.stats = None
        if self.enable_plots:
            self.stats = {
                "tr": statbin(self.stats_rate),  # Total Reward
                "ft": statbin(self.stats_rate),  # Finishing Time
                "minvf": statbin(self.stats_rate),  # Min Value Fn
                "maxvf": statbin(self.stats_rate),  # Min Value Fn
                "cost": statbin(self.stats_rate),  # Loss
            }

        # set up the TF session
        self.session = tf.Session()
        K.set_session(self.session)
        self.setup_graphs()
        self.saver = tf.train.Saver()

Exemplo n.º 45

0

Exibir arquivo

Arquivo: debug.py Projeto: analog-rl/modular_rl

from modular_rl import *
import argparse, sys, cPickle
from tabulate import tabulate
import shutil, os, logging
import gym


mondir = "tmp.dir"
env = 'CartPole-v0'
video = False
agent = 'modular_rl.agentzoo.TrpoAgent'
seed = 7
use_hdf = False


env = make(env)
os.mkdir(mondir)
env.monitor.start(mondir, video_callable=None if video else VIDEO_NEVER)
agent_ctor = get_agent_cls(agent)
# update_argument_parser(parser, agent_ctor.options)
# if args.timestep_limit == 0:
#     args.timestep_limit = env_spec.timestep_limit
# cfg = args.__dict__
np.random.seed(seed)
agent = agent_ctor(env.observation_space, env.action_space, [])
# if use_hdf:
#     hdf, diagnostics = prepare_h5_file(args)
gym.logger.setLevel(logging.WARN)
timestep_limit
COUNTER = 0

Exemplo n.º 46

0

Exibir arquivo

Arquivo: test_registration.py Projeto: AniChikage/gym

def test_make():
    env = envs.make('CartPole-v0')
    assert env.spec.id == 'CartPole-v0'
    assert isinstance(env, cartpole.CartPoleEnv)