def main(env_id, policy_file, record, stochastic, extra_kwargs): import lab from lab import wrappers import tensorflow as tf from es_distributed.policies import MujocoPolicy import numpy as np env = lab.make(env_id) if record: import uuid env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) if extra_kwargs: import json extra_kwargs = json.loads(extra_kwargs) with tf.Session(): pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) while True: rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) print('return={:.4f} len={}'.format(rews.sum(), t)) if record: env.close() return
def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') args = parser.parse_args() if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) env = lab.make('flashgames.NeonRace-v0') env.configure(remotes=1) # automatically creates a local docker container # Restrict the valid random actions. (Try removing this and see # what happens when the agent is given full control of the # keyboard/mouse.) env = wrappers.experimental.SafeActionSpace(env) observation_n = env.reset() while True: # your agent here # # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True) action_n = [env.action_space.sample() for ob in observation_n] observation_n, reward_n, done_n, info = env.step(action_n) env.render() return 0
def create_flash_env(env_id, client_id, remotes, **_): env = lab.make(env_id) env = Vision(env) env = Logger(env) env = BlockingReset(env) reg = institute.runtime_spec('flashgames').server_registry height = reg[env_id]["height"] width = reg[env_id]["width"] env = CropScreen(env, height, width, 84, 18) env = FlashRescale(env) keys = ['left', 'right', 'up', 'down', 'x'] if env_id == 'flashgames.NeonRace-v0': # Better key space for this game. keys = ['left', 'right', 'up', 'left up', 'right up', 'down', 'up x'] logger.info('create_flash_env(%s): keys=%s', env_id, keys) env = DiscreteToFixedKeysVNCActions(env, keys) env = EpisodeID(env) env = DiagnosticsInfo(env) env = Unvectorize(env) env.configure(fps=5.0, remotes=remotes, start_timeout=15 * 60, client_id=client_id, vnc_driver='go', vnc_kwargs={ 'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 3 }) return env
def test_describe_handling(): env = lab.make('flashgames.DuskDrive-v0') env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() reward_buffer = get_reward_buffer(env) rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) assert reward_buffer._remote_episode_id == '1' assert reward_buffer._remote_env_state == 'resetting' assert reward_buffer._current_episode_id == None assert reward_buffer.reward_state( reward_buffer._current_episode_id)._env_state == None rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) assert reward_buffer._remote_episode_id == '1' assert reward_buffer._remote_env_state == 'resetting' assert reward_buffer._current_episode_id == '1' assert reward_buffer.reward_state( reward_buffer._current_episode_id)._env_state == 'resetting'
def create_atari_env(env_id): env = lab.make(env_id) env = Vectorize(env) env = AtariRescale42x42(env) env = DiagnosticsInfo(env) env = Unvectorize(env) return env
def test_connect(): env = lab.make('flashgames.DuskDrive-v0') env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') vnc_session = get_vnc_session(env) rewarder_session = get_rewarder_session(env) assert vnc_session._to_dict() == { '0': { 'name': '0', 'subsample_level': 2, 'encoding': 'tight', 'fine_quality_level': 50, 'start_timeout': 7, 'address': 'example.com:5900', 'password': '******' } } assert rewarder_session._to_dict() == { '0': { 'start_timeout': 7, 'seed': None, 'name': '0', 'fps': 60, 'address': 'example.com:15900', 'env_id': 'flashgames.DuskDrive-v0', 'password': '******', 'skip_network_calibration': False, 'observer': False, 'label': '0:example.com:5900' } }
def test_vnc_env(): env = lab.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, None, None) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, 'resetting', '1') rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 10, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 15, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': -3, 'done': False, 'info': {} }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert sorted(observation.keys()) == ['text', 'vision'] assert observation['text'] == [] assert observation['vision'].shape == (768, 1024, 3) assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (22, False, 'running', '1') assert info['stats.reward.count'] == 3
def test_steps_limit_restart_unused_when_not_wrapped(): env = lab.make('test.StepsLimitDummyVNCEnv-v0') env.configure(_n=1) env.reset() for i in range(10): _, _, done, info = env.step([[]]) assert done == [False]
def test_joint(): env1 = lab.make('test.DummyVNCEnv-v0') env2 = lab.make('test.DummyVNCEnv-v0') env1.configure(_n=3) env2.configure(_n=3) for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]: reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60) reward_buffer.reset('1') reward_buffer.push('1', 10, False, {}) env = wrappers.Joint([env1, env2]) assert env.n == 6 observation_n = env.reset() assert observation_n == [None] * 6 observation_n, reward_n, done_n, info = env.step([[] for _ in range(env.n)]) assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0] assert done_n == [False] * 6
def setup(exp, single_threaded): import lab lab.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) env = lab.make(exp['env_id']) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def test_smoke(env_id): """Check that environments start up without errors and that we can extract rewards and observations""" lab.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) env = lab.make(env_id) if env.metadata.get('configure.required', False): if os.environ.get('FORCE_LATEST_INSTITUTE_DOCKER_RUNTIMES'): # Used to test institute-envs in CI configure_with_latest_docker_runtime_tag(env) else: env.configure(remotes=1) env = wrappers.Unvectorize(env) env.reset() _rollout(env, timestep_limit=60*30) # Check a rollout
def test_steps_limit_restart(): env = lab.make('test.StepsLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset() assert env._max_episode_seconds == None assert env._max_episode_steps == 2 # Episode has started _, _, done, info = env.step([[]]) assert done == [False] # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step([[]]) assert done == [True] assert env._elapsed_steps == 0
def test_default_time_limit(): # We need an env without a default limit register( id='test.NoLimitDummyVNCEnv-v0', entry_point='institute.envs:DummyVNCEnv', tags={ 'vnc': True, }, ) env = lab.make('test.NoLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset() assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS assert env._max_episode_steps == None
def create_vncatari_env(env_id, client_id, remotes, **_): env = lab.make(env_id) env = Vision(env) env = Logger(env) env = BlockingReset(env) env = LabCoreAction(env) env = AtariRescale42x42(env) env = EpisodeID(env) env = DiagnosticsInfo(env) env = Unvectorize(env) logger.info('Connecting to remotes: %s', remotes) fps = env.metadata['video.frames_per_second'] env.configure(remotes=remotes, start_timeout=15 * 60, fps=fps, client_id=client_id) return env
def test_peek(): env = lab.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step([spaces.PeekReward]) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'resetting' rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert not info.get('mask.masked.observation') assert not info.get('mask.masked.action') assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'running'
def __init__(self, env, lab_core_id=None): super(LabCoreAction, self).__init__(env) if lab_core_id is None: # self.spec is None while inside of the make, so we need # to pass lab_core_id in explicitly there. This case will # be hit when instantiating by hand. lab_core_id = self.spec._kwargs['lab_core_id'] spec = lab.spec(lab_core_id) raw_action_space = lab_core_action_space(lab_core_id) self._actions = raw_action_space.actions self.action_space = lab_spaces.Discrete(len(self._actions)) if spec._entry_point.startswith('lab.envs.atari:'): self.key_state = translator.AtariKeyState(lab.make(lab_core_id)) else: self.key_state = None
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. lab.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = lab.make(spec._kwargs['lab_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
def test_boundary_simple(): env = lab.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 1, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 2, 'done': True, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) # We have reward of 3 for episode 1, and episode 2 should now be resetting observation, reward, done, info = env.step([]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (3, True, 'resetting', '2')
parser.add_argument('-T', '--start-timeout', type=int, default=None, help='Rewarder session connection timeout (seconds)') args = parser.parse_args() logging.getLogger('lab').setLevel(logging.NOTSET) logging.getLogger('institute').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: env = lab.make(args.env_id) else: env = wrappers.WrappedVNCEnv() # env = wrappers.BlockingReset(env) if not isinstance(env, wrappers.LabCoreAction): # The LabCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.experimental.SafeActionSpace(env) else: # Only lab-core are seedable env.seed([0]) env = wrappers.Logger(env)
def test_boundary_multiple(): env = lab.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) # episode 2 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '2'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '2'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 2, 'done': True, 'info': {} }, {'episode_id': '2'}) # episode 3 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '3'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '3'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 3, 'done': True, 'info': {} }, {'episode_id': '3'}) # episode 4 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '4'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '4'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 4, 'done': False, 'info': {} }, {'episode_id': '4'}) observation, reward, done, info = env.step([]) assert not info.get('mask.masked.observation') assert not info.get('mask.masked.action') assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (2, True, 'running', '4') assert (info['env_status.complete.env_state'], info['env_status.complete.episode_id']) == ('running', '2') observation, reward, done, info = env.step([]) assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (4, False, 'running', '4')