def test_make_deprecated(): try: envs.make('Humanoid-v0') except error.Error: pass else: assert False
def verify_environments_match(old_environment_id, new_environment_id, seed=1, num_actions=1000): old_environment = envs.make(old_environment_id) new_environment = envs.make(new_environment_id) old_environment.seed(seed) new_environment.seed(seed) old_reset_observation = old_environment.reset() new_reset_observation = new_environment.reset() np.testing.assert_allclose(old_reset_observation, new_reset_observation) for i in range(num_actions): action = old_environment.action_space.sample() old_observation, old_reward, old_done, old_info = old_environment.step( action) new_observation, new_reward, new_done, new_info = new_environment.step( action) eps = 1e-6 np.testing.assert_allclose(old_observation, new_observation, atol=eps) np.testing.assert_allclose(old_reward, new_reward, atol=eps) np.testing.assert_allclose(old_done, new_done, atol=eps) for key in old_info: np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
def test_make_deprecated(): try: envs.make("Humanoid-v0") except error.Error: pass else: assert False
def test_random_rollout(): for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]: agent = lambda ob: env.action_space.sample() ob = env.reset() for _ in range(10): assert env.observation_space.contains(ob) a = agent(ob) assert env.action_space.contains(a) (ob, _reward, done, _info) = env.step(a) if done: break
def test_env_render_result_is_immutable(): environs = [ envs.make("Taxi-v3"), envs.make("FrozenLake-v1"), ] for env in environs: env.reset() output = env.render(mode="ansi") assert isinstance(output, str) env.close()
def test_env_render_result_is_immutable(): environs = [ envs.make('Taxi-v3'), envs.make('FrozenLake-v0'), envs.make('Reverse-v0'), ] for env in environs: env.reset() output = env.render(mode='ansi') assert isinstance(output, str) env.close()
def test_env_render_result_is_immutable(): from six import string_types environs = [ envs.make('Taxi-v2'), envs.make('FrozenLake-v0'), envs.make('Reverse-v0'), ] for env in environs: env.reset() output = env.render(mode='ansi') assert isinstance(output, string_types) env.close()
def test_make_with_kwargs(): env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3") assert env.spec.id == "test.ArgumentEnv-v0" assert isinstance(env.unwrapped, ArgumentEnv) assert env.arg1 == "arg1" assert env.arg2 == "override_arg2" assert env.arg3 == "override_arg3"
def main(): # Initialize environment env = envs.make('minecraft-v0') env.reset() # Get action space #action_space = env.action_space # Plan some random stuff to do actions = [ # agent_id, action_num (1, 0), (2, 4), (2, 3), (2, 0), (1, 3), (2, 2), (2, 5), # gets reward of 1 ] # Do some random stuff to the env for 2 times for i in range(2): for a in actions: state1, reward, done, info = env.step(a) if i < 1: # output results print('reward: ', reward) print('facing:', state1['facing']) print('position:', state1['position']) print(state1['view'][::-1, 1, :]) # Reset env env.reset()
def test_grayscale(): env = envs.make('Pong-v0') env.env._obs_type = 'grayscale_image' ob = env.reset() assert ob.shape == (210, 160, 1) ob = env.render('grayscale_array') assert ob.shape == (210, 160, 1)
def test_env_version_suggestions(register_some_envs, env_id_input, suggested_versions, default_version): if default_version: match_str = "provides the default version" with pytest.raises( error.DeprecatedEnv, match=match_str, ): envs.make(env_id_input) else: match_str = f"versioned environments: \\[ {suggested_versions} \\]" with pytest.raises( error.UnregisteredEnv, match=match_str, ): envs.make(env_id_input)
def test_make_with_kwargs(): env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3') assert env.spec.id == 'test.ArgumentEnv-v0' assert isinstance(env.unwrapped, ArgumentEnv) assert env.arg1 == 'arg1' assert env.arg2 == 'override_arg2' assert env.arg3 == 'override_arg3'
def test_serialize_deserialize(): env1 = envs.make('HandReach-v0', distance_threshold=1e-6) env1.reset() env2 = pickle.loads(pickle.dumps(env1)) assert env1.distance_threshold == env2.distance_threshold, ( env1.distance_threshold, env2.distance_threshold)
def test_serialize_deserialize(environment_id): env1 = envs.make(environment_id, target_position='fixed') env1.reset() env2 = pickle.loads(pickle.dumps(env1)) assert env1.target_position == env2.target_position, (env1.target_position, env2.target_position)
def test_serialize_deserialize(environment_id): env = envs.make(environment_id) env.reset() with pytest.raises(ValueError, match="Action dimension mismatch"): env.step([0.1]) with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(0.1)
def test_environment_reset(): # Arrange env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10) # Act (ads, impressions, clicks) = env.reset('Test') # Assert assert clicks == 0 assert impressions == 0 assert ads == [Ad(0), Ad(1)]
def test_environment_step_with_reward(): # Arrange env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10, reward_policy=lambda x: 1) env.reset(scenario_name='Test') # Act ((ads, impressions, clicks), reward, done, info) = env.step(1) # Assert assert clicks == 1 assert impressions == 1 assert info == {} assert reward == 1 assert not done assert ads == [Ad(0), Ad(1, impressions=1, clicks=1)]
def make_envs_by_action_space_type(spec_list: List[EnvSpec], action_space: Space): """Make environments of specific action_space type. This function returns a filtered list of environment from the spec_list that matches the action_space type. Args: spec_list (list): list of registered environments' specification action_space (gym.spaces.Space): action_space type """ filtered_envs = [] for spec in spec_list: env = envs.make(spec.id) if isinstance(env.action_space, action_space): filtered_envs.append(env) return filtered_envs
def test_box_actions_out_of_bound(env, seed): """Test out of bound actions in Box action_space. Environments with Box actions spaces perform clipping inside `step`. The expected behaviour is that an action `out-of-bound` has the same effect of an action with value exactly at the upper (or lower) bound. Args: env (gym.Env): the gym environment seed (int): seed value for determinism """ OOB_VALUE = 100 env.reset(seed=seed) oob_env = envs.make(env.spec.id) oob_env.reset(seed=seed) dtype = env.action_space.dtype upper_bounds = env.action_space.high lower_bounds = env.action_space.low for i, (is_upper_bound, is_lower_bound) in enumerate( zip(env.action_space.bounded_above, env.action_space.bounded_below)): if is_upper_bound: obs, _, _, _ = env.step(upper_bounds) oob_action = upper_bounds.copy() oob_action[i] += np.cast[dtype](OOB_VALUE) assert oob_action[i] > upper_bounds[i] oob_obs, _, _, _ = oob_env.step(oob_action) assert np.alltrue(obs == oob_obs) if is_lower_bound: obs, _, _, _ = env.step(lower_bounds) oob_action = lower_bounds.copy() oob_action[i] -= np.cast[dtype](OOB_VALUE) assert oob_action[i] < lower_bounds[i] oob_obs, _, _, _ = oob_env.step(oob_action) assert np.alltrue(obs == oob_obs)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--raw_actions', action='store_true') args = parser.parse_args() env = envs.make('Go9x9-v0') env.reset() while True: s = env._state env._render() colorstr = pachi_py.color_to_str(s.color) if args.raw_actions: a = int(raw_input('{} (raw)> '.format(colorstr))) else: coordstr = raw_input('{}> '.format(colorstr)) a = go.str_to_action(s.board, coordstr) _, r, done, _ = env.step(a) if done: break print('You win!' if r > 0 else 'Opponent wins!') print('Final score:', env._state.board.official_score)
env = self.env ret = [] for o, r, d in zip(observation_n, reward_n, done_n): o = env.observation_convert(o, env._env.observation_space, env.observation_space) obs = np.expand_dims(o, 0) action_dist_n = self.session.run(self.action_dist_n, {self.obs: obs}) action = int(np.argmax(action_dist_n, 1)[0]) action = env.action_convert(action, env.action_space, env._env.action_space) ret.append(action) return ret experiment_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) print ("taks = {}".format(args.task)) env = envs.make(args.task) env.monitor.start(experiment_dir) agent = ContinTRPOAgent(env) agent.learn() env.monitor.close() gym.upload(experiment_dir, algorithm_id=algo) print (experiment_dir) from sys import argv print ('python {}'.format(' '.join(argv)))
def test_make(): env = envs.make("CartPole-v0") assert env.spec.id == "CartPole-v0" assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
stats["KL between old and new distribution"] = kloldnew stats["Surrogate loss"] = surrafter for k, v in stats.iteritems(): print(k + ": " + " " * (40 - len(k)) + str(v)) if entropy != entropy: exit(-1) if exp > 0.8: self.train = False i += 1 training_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) if len(sys.argv) > 1: task = sys.argv[1] else: task = "RepeatCopy-v0" env = envs.make(task) env.monitor.start(training_dir) env = SpaceConversionEnv(env, Box, Discrete) agent = TRPOAgent(env) agent.learn() env.monitor.close() gym.upload(training_dir, algorithm_id='trpo_ff')
from .modular_rl import * import argparse, sys, pickle, shutil import gym, logging from tabulate import tabulate if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) update_argument_parser(parser, GENERAL_OPTIONS) parser.add_argument("--env", required=True) parser.add_argument("--agent", required=True) parser.add_argument("--plot", action="store_true") args, _ = parser.parse_known_args( [arg for arg in sys.argv[1:] if arg not in ('-h', '--help')]) env = make(args.env) env_spec = env.spec mondir = args.outfile + ".dir" if os.path.exists(mondir): shutil.rmtree(mondir) os.mkdir(mondir) env.monitor.start(mondir, video_callable=None if args.video else VIDEO_NEVER) agent_ctor = get_agent_cls(args.agent) update_argument_parser(parser, agent_ctor.options) update_argument_parser(parser, CEM_OPTIONS) args = parser.parse_args() cfg = args.__dict__ agent = agent_ctor(env.observation_space, env.action_space, cfg) np.random.seed(args.seed) hdf, diagnostics = prepare_h5_file(args)
type=str, help="name of the environment. Options: Gen3-v0") parser.add_argument("--mode", choices=["noop", "random", "human"], default="random", help="mode of the agent") parser.add_argument("--max_steps", type=int, default=0, help="maximum episode length") parser.add_argument("--fps", type=float) parser.add_argument("--once", action="store_true") parser.add_argument("--ignore_done", action="store_true") args = parser.parse_args() env = envs.make(args.env) action_space = env.action_space mode = args.mode fps = args.fps or env.metadata.get('video.frames_per_second') or 100 if args.max_steps == 0: args.max_steps = env.spec.tags[ 'wrapper_config.TimeLimit.max_episode_steps'] print("max_steps = ", args.max_steps) print("Press ESC to quit") reward = 0 done = False if mode == "random": agent = RandomAgent(action_space) elif mode == "noop": agent = NoopAgent(action_space)
import numpy as np import gym from gym import spaces, envs gym.undo_logger_setup() import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler()) num_trials = 50 print 'Name & Random policy performance' names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1'] for env_name in names: env = envs.make(env_name) returns = [] for _ in xrange(num_trials): env.reset() ret = 0. for _ in xrange(env.spec.timestep_limit): _, r, done, _ = env.step(env.action_space.sample()) ret += r if done: break returns.append(ret) print '{} & {} \pm {}'.format(env_name, np.mean(returns), np.std(returns))
parser.add_option("-d", "--discount", dest="discount", default=0.99, type='float', help="Discount rate for future reards [%default]") parser.add_option("-t", "--num_frames", dest="nframes", default=2, type='int', help="Number of Sequential observations/timesteps to store in a single example [%default]") parser.add_option("-m", "--max_mem", dest="maxmem", default=100000, type='int', help="Max number of samples to remember [%default]") parser.add_option("-P", "--plots", dest="plots", action="store_true", default=False, help="Plot learning statistics while running [%default]") parser.add_option("-F", "--plot_rate", dest="plot_rate", default=10, type='int', help="Plot update rate in episodes [%default]") parser.add_option("-S", "--submit", dest="submit", action="store_true", default=False, help="Submit Results to OpenAI [%default]") parser.add_option("-a", "--agent", dest="agent", default="ddqn", help="Which learning algorithm to use [%default]") (options, args) = parser.parse_args() print options.agent training_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) from gym import envs env = envs.make(options.env) if options.submit: env.monitor.start(training_dir) import dqn agent_constructor = { "dqn":dqn.DQN, "ddqn":dqn.D2QN }[options.agent] agent = agent_constructor(env, nframes=options.nframes, epsilon=options.epsilon, discount=options.discount, modelfactory=eval("dqn.%s"%(options.net)), epsilon_schedule=lambda episode,epsilon: max(0.05, epsilon*(1-options.epsilon_decay)), update_nsamp=options.update_size, batch_size=options.bs, dropout=options.dropout, timesteps_per_batch=options.update_freq, stats_rate=options.plot_rate, enable_plots = options.plots, max_memory = options.maxmem ) agent.learn()
default=False, help="Submit Results to OpenAI [%default]") parser.add_option("-a", "--agent", dest="agent", default="ddqn", help="Which learning algorithm to use [%default]") (options, args) = parser.parse_args() print options.agent training_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) from gym import envs env = envs.make(options.env) if options.submit: env.monitor.start(training_dir) import dqn agent_constructor = {"dqn": dqn.DQN, "ddqn": dqn.D2QN}[options.agent] agent = agent_constructor(env, nframes=options.nframes, epsilon=options.epsilon, discount=options.discount, modelfactory=eval("dqn.%s" % (options.net)), epsilon_schedule=lambda episode, epsilon: max( 0.05, epsilon * (1 - options.epsilon_decay)), update_nsamp=options.update_size, batch_size=options.bs,
ret = [] for o, r, d in zip(observation_n, reward_n, done_n): o = env.observation_convert(o, env._env.observation_space, env.observation_space) obs = np.expand_dims(o, 0) action_dist_n = self.session.run(self.action_dist_n, {self.obs: obs}) action = int(np.argmax(action_dist_n, 1)[0]) action = env.action_convert(action, env.action_space, env._env.action_space) ret.append(action) return ret experiment_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) print("taks = {}".format(args.task)) env = envs.make(args.task) env.monitor.start(experiment_dir) agent = ContinTRPOAgent(env) agent.learn() env.monitor.close() gym.upload(experiment_dir, algorithm_id=algo) print(experiment_dir) from sys import argv print('python {}'.format(' '.join(argv)))
def test_make(): env = envs.make("CartPole-v0") assert env.spec.id == "CartPole-v0" assert isinstance(env, cartpole.CartPoleEnv)
def __init__(self, experiment="Breakout-v0", env=None, nthreads=16, nframes=1, epsilon=0.5, enable_plots=False, render=False, learning_rate=1e-4, modelfactory=networks.simple_cnn, difference_obs=True, preprocessor=preproc.karpathy_preproc, discount=0.99, batch_size=32, epsilon_min=0.05, epsilon_schedule=None, stats_rate=10, **kwargs): self.kwargs = kwargs self.experiment = experiment if env == None: env = lambda: envs.make(self.experiment) self.nthreads = nthreads self.env = map(lambda x: env(), range(0, self.nthreads)) self.model_factory = modelfactory self.nframes = nframes self.learning_rate = learning_rate self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_schedule = epsilon_schedule self.gamma = discount self.preprocessor = preprocessor self.difference_obs = difference_obs self.network_update_frequency = batch_size self.target_network_update_frequency = 10000 self.T = 0 self.TMAX = 80000000 self.checkpoint_interval = 600 self.checkpoint_dir = "/tmp/" self.enable_plots = enable_plots self.stats_rate = stats_rate self.ipy_clear = False self.next_plot = 0 self.e = 0 self.render = render self.render_rate_hz = 5.0 self.render_ngames = 2 self.plot_q = Queue.Queue() # set up output shape to be either pre-processed or not if not self.preprocessor == None: print(self.env[0].observation_space.shape) o = self.preprocessor(np.zeros( self.env[0].observation_space.shape)) self.input_dim_orig = [self.nframes] + list(o.shape) else: self.input_dim_orig = [self.nframes] + list( self.env[0].observation_space.shape) self.input_dim = np.product(self.input_dim_orig) print(self.input_dim, self.input_dim_orig) # set up plotting storage self.stats = None if self.enable_plots: self.stats = { "tr": statbin(self.stats_rate), # Total Reward "ft": statbin(self.stats_rate), # Finishing Time "minvf": statbin(self.stats_rate), # Min Value Fn "maxvf": statbin(self.stats_rate), # Min Value Fn "cost": statbin(self.stats_rate), # Loss } # set up the TF session self.session = tf.Session() K.set_session(self.session) self.setup_graphs() self.saver = tf.train.Saver()
def __init__(self, env_name): self.env = envs.make(env_name) self.action_space = self.env.action_space self.curr_obs = self.env.reset() self.is_done = False
import gym from gym import spaces, envs import argparse import numpy as np import itertools import time from builtins import input import random from mujoco_py.modder import TextureModder, MaterialModder import cv2 from functions_mpi import * comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() number_to_experiment = [1,3,5,107,109,111] if rank == 0: print("Hello I am the master rank", str(rank), "of", str(size)) env = envs.make("FetchSlide-v1") MasterProgramCrossEntropy(env,size,comm) else: print("Hello I am the slave rank", str(rank), "of", str(size)) env = envs.make("FetchSlide-v1") #SlaveProgram(rank,env) SlaveProgramCrossEntropyExperimentReward(rank,env,comm,number_to_experiment )
import argparse, sys, cPickle from tabulate import tabulate import shutil, os, logging import gym import numpy as np if __name__ == "__main__": parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) update_argument_parser(parser, GENERAL_OPTIONS) parser.add_argument("--env", required=True) parser.add_argument("--agent", required=True) parser.add_argument("--plot", action="store_true") args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')]) env = make(args.env) env_spec = env.spec mondir = args.outfile + ".dir" if os.path.exists(mondir): shutil.rmtree(mondir) os.mkdir(mondir) env.monitor.start(mondir, video_callable=None if args.video else VIDEO_NEVER) agent_ctor = get_agent_cls(args.agent) update_argument_parser(parser, agent_ctor.options) args = parser.parse_args() if args.timestep_limit == 0: args.timestep_limit = env_spec.timestep_limit cfg = args.__dict__ np.random.seed(args.seed) print env.observation_space, env.action_space
def test_env_suggestions(register_some_envs, env_id_input, env_id_suggested): with pytest.raises(error.UnregisteredEnv, match=f"Did you mean: `{env_id_suggested}` ?"): envs.make(env_id_input)
'random number generator. Negative value is ignored.') args = parser.parse_args() if (args.seed >= 0): np.random.seed(args.seed) # Use xrange for python 2.7 to speed up. if sys.version_info.major < 3: range = xrange # Create an OpenAI Gym environment, and obtain its state/action # information. if args.env not in envs.registry.env_specs.keys(): # Try to find from local environment libraries. env_factory.register_env(args.env) env = envs.make(args.env) o_space = env.observation_space a_space = env.action_space image_observation = True if isinstance( env.env, AtariEnv) and env.env._obs_type == 'image' else False print("Loaded environment '{0}'".format(args.env)) print("Observation space: '{0}'".format(o_space)) print("Action space: '{0}'".format(a_space)) print('Is observation an image: {0}'.format(image_observation)) if args.max_episode_steps <= 0: args.max_episode_steps = \ env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] # Create an agent. agent = agent_factory.make_agent(args.agent_config,
import gym from gym import spaces, envs gym.undo_logger_setup() import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler()) names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1'] for n in names: env = envs.make(n) aspace = env.action_space if isinstance(aspace, spaces.Box): acont = True asize = aspace.low.shape[0] else: acont = False asize = aspace.n ospace = env.observation_space if isinstance(ospace, spaces.Box): ocont = True osize = ospace.low.shape[0] else: ocont = False osize = ospace.n print '{} & {} ({}) & {} ({}) \\\\'.format(n, osize, 'continuous' if ocont else 'discrete', asize, 'continuous' if acont else 'discrete')
print(k + ": " + " " * (40 - len(k)) + str(v)) if entropy != entropy: exit(-1) if exp > 0.8: self.train = False i += 1 training_dir = tempfile.mkdtemp() logging.getLogger().setLevel(logging.DEBUG) # if len(sys.argv) > 1: # print(9284) # task = sys.argv[1] # else: # print(8966) # task = "RepeatCopy-v0" task = "CartPole-v0" # task = "MountainCar-v0" env = envs.make(task) # env.monitor.start(training_dir) # env = SpaceConversionEnv(env, Box, Discrete) agent = TRPOAgent(env) agent.learn() env.monitor.close() gym.upload(training_dir, algorithm_id='trpo_ff')
def test_return_latest_versioned_env(register_some_envs): with pytest.warns(UserWarning): env = envs.make("MyAwesomeNamespace/MyAwesomeVersionedEnv") assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5"
import gym from gym import spaces, envs gym.undo_logger_setup() import logging logging.getLogger('gym.core').addHandler(logging.NullHandler()) names = [ 'CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1' ] for n in names: env = envs.make(n) aspace = env.action_space if isinstance(aspace, spaces.Box): acont = True asize = aspace.low.shape[0] else: acont = False asize = aspace.n ospace = env.observation_space if isinstance(ospace, spaces.Box): ocont = True osize = ospace.low.shape[0] else: ocont = False osize = ospace.n print '{} & {} ({}) & {} ({}) \\\\'.format(
def __init__( self, experiment="Breakout-v0", env=None, nthreads=16, nframes=1, epsilon=0.5, enable_plots=False, render=False, learning_rate=1e-4, modelfactory=networks.simple_cnn, difference_obs=True, preprocessor=preproc.karpathy_preproc, discount=0.99, batch_size=32, epsilon_min=0.05, epsilon_schedule=None, stats_rate=10, **kwargs ): self.kwargs = kwargs self.experiment = experiment if env == None: env = lambda: envs.make(self.experiment) self.nthreads = nthreads self.env = map(lambda x: env(), range(0, self.nthreads)) self.model_factory = modelfactory self.nframes = nframes self.learning_rate = learning_rate self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_schedule = epsilon_schedule self.gamma = discount self.preprocessor = preprocessor self.difference_obs = difference_obs self.network_update_frequency = batch_size self.target_network_update_frequency = 10000 self.T = 0 self.TMAX = 80000000 self.checkpoint_interval = 600 self.checkpoint_dir = "/tmp/" self.enable_plots = enable_plots self.stats_rate = stats_rate self.ipy_clear = False self.next_plot = 0 self.e = 0 self.render = render self.render_rate_hz = 5.0 self.render_ngames = 2 self.plot_q = Queue.Queue() # set up output shape to be either pre-processed or not if not self.preprocessor == None: print self.env[0].observation_space.shape o = self.preprocessor(np.zeros(self.env[0].observation_space.shape)) self.input_dim_orig = [self.nframes] + list(o.shape) else: self.input_dim_orig = [self.nframes] + list(self.env[0].observation_space.shape) self.input_dim = np.product(self.input_dim_orig) print self.input_dim, self.input_dim_orig # set up plotting storage self.stats = None if self.enable_plots: self.stats = { "tr": statbin(self.stats_rate), # Total Reward "ft": statbin(self.stats_rate), # Finishing Time "minvf": statbin(self.stats_rate), # Min Value Fn "maxvf": statbin(self.stats_rate), # Min Value Fn "cost": statbin(self.stats_rate), # Loss } # set up the TF session self.session = tf.Session() K.set_session(self.session) self.setup_graphs() self.saver = tf.train.Saver()
from modular_rl import * import argparse, sys, cPickle from tabulate import tabulate import shutil, os, logging import gym mondir = "tmp.dir" env = 'CartPole-v0' video = False agent = 'modular_rl.agentzoo.TrpoAgent' seed = 7 use_hdf = False env = make(env) os.mkdir(mondir) env.monitor.start(mondir, video_callable=None if video else VIDEO_NEVER) agent_ctor = get_agent_cls(agent) # update_argument_parser(parser, agent_ctor.options) # if args.timestep_limit == 0: # args.timestep_limit = env_spec.timestep_limit # cfg = args.__dict__ np.random.seed(seed) agent = agent_ctor(env.observation_space, env.action_space, []) # if use_hdf: # hdf, diagnostics = prepare_h5_file(args) gym.logger.setLevel(logging.WARN) timestep_limit COUNTER = 0
def test_make(): env = envs.make('CartPole-v0') assert env.spec.id == 'CartPole-v0' assert isinstance(env, cartpole.CartPoleEnv)