示例#1
0
文件: viz.py 项目: SynthAI/SynthAI
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import lab
    from lab import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy
    import numpy as np

    env = lab.make(env_id)
    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        while True:
            rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return
示例#2
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    env = lab.make('flashgames.NeonRace-v0')
    env.configure(remotes=1)  # automatically creates a local docker container

    # Restrict the valid random actions. (Try removing this and see
    # what happens when the agent is given full control of the
    # keyboard/mouse.)
    env = wrappers.experimental.SafeActionSpace(env)
    observation_n = env.reset()

    while True:
        # your agent here
        #
        # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
        action_n = [env.action_space.sample() for ob in observation_n]
        observation_n, reward_n, done_n, info = env.step(action_n)
        env.render()

    return 0
示例#3
0
文件: envs.py 项目: SynthAI/SynthAI
def create_flash_env(env_id, client_id, remotes, **_):
    env = lab.make(env_id)
    env = Vision(env)
    env = Logger(env)
    env = BlockingReset(env)

    reg = institute.runtime_spec('flashgames').server_registry
    height = reg[env_id]["height"]
    width = reg[env_id]["width"]
    env = CropScreen(env, height, width, 84, 18)
    env = FlashRescale(env)

    keys = ['left', 'right', 'up', 'down', 'x']
    if env_id == 'flashgames.NeonRace-v0':
        # Better key space for this game.
        keys = ['left', 'right', 'up', 'left up', 'right up', 'down', 'up x']
    logger.info('create_flash_env(%s): keys=%s', env_id, keys)

    env = DiscreteToFixedKeysVNCActions(env, keys)
    env = EpisodeID(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)
    env.configure(fps=5.0,
                  remotes=remotes,
                  start_timeout=15 * 60,
                  client_id=client_id,
                  vnc_driver='go',
                  vnc_kwargs={
                      'encoding': 'tight',
                      'compress_level': 0,
                      'fine_quality_level': 50,
                      'subsample_level': 3
                  })
    return env
示例#4
0
def test_describe_handling():
    env = lab.make('flashgames.DuskDrive-v0')
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    reward_buffer = get_reward_buffer(env)
    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == None
    assert reward_buffer.reward_state(
        reward_buffer._current_episode_id)._env_state == None

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == '1'
    assert reward_buffer.reward_state(
        reward_buffer._current_episode_id)._env_state == 'resetting'
示例#5
0
文件: envs.py 项目: SynthAI/SynthAI
def create_atari_env(env_id):
    env = lab.make(env_id)
    env = Vectorize(env)
    env = AtariRescale42x42(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)
    return env
示例#6
0
def test_connect():
    env = lab.make('flashgames.DuskDrive-v0')
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    vnc_session = get_vnc_session(env)
    rewarder_session = get_rewarder_session(env)

    assert vnc_session._to_dict() == {
        '0': {
            'name': '0',
            'subsample_level': 2,
            'encoding': 'tight',
            'fine_quality_level': 50,
            'start_timeout': 7,
            'address': 'example.com:5900',
            'password': '******'
        }
    }
    assert rewarder_session._to_dict() == {
        '0': {
            'start_timeout': 7,
            'seed': None,
            'name': '0',
            'fps': 60,
            'address': 'example.com:15900',
            'env_id': 'flashgames.DuskDrive-v0',
            'password': '******',
            'skip_network_calibration': False,
            'observer': False,
            'label': '0:example.com:5900'
        }
    }
示例#7
0
def test_vnc_env():
    env = lab.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, None, None)

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, 'resetting',
                                               '1')

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 10,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 15,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': -3,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert sorted(observation.keys()) == ['text', 'vision']
    assert observation['text'] == []
    assert observation['vision'].shape == (768, 1024, 3)
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (22, False, 'running', '1')
    assert info['stats.reward.count'] == 3
示例#8
0
def test_steps_limit_restart_unused_when_not_wrapped():
    env = lab.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env.reset()

    for i in range(10):
        _, _, done, info = env.step([[]])
        assert done == [False]
示例#9
0
def test_joint():
    env1 = lab.make('test.DummyVNCEnv-v0')
    env2 = lab.make('test.DummyVNCEnv-v0')
    env1.configure(_n=3)
    env2.configure(_n=3)
    for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]:
        reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60)
        reward_buffer.reset('1')
        reward_buffer.push('1', 10, False, {})

    env = wrappers.Joint([env1, env2])
    assert env.n == 6
    observation_n = env.reset()
    assert observation_n == [None] * 6

    observation_n, reward_n, done_n, info = env.step([[]
                                                      for _ in range(env.n)])
    assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0]
    assert done_n == [False] * 6
示例#10
0
文件: es.py 项目: SynthAI/SynthAI
def setup(exp, single_threaded):
    import lab
    lab.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    env = lab.make(exp['env_id'])
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space,
                                                      env.action_space,
                                                      **exp['policy']['args'])
    tf_util.initialize()

    return config, env, sess, policy
示例#11
0
def test_smoke(env_id):
    """Check that environments start up without errors and that we can extract rewards and observations"""
    lab.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    env = lab.make(env_id)
    if env.metadata.get('configure.required', False):
        if os.environ.get('FORCE_LATEST_INSTITUTE_DOCKER_RUNTIMES'):  # Used to test institute-envs in CI
            configure_with_latest_docker_runtime_tag(env)
        else:
            env.configure(remotes=1)

    env = wrappers.Unvectorize(env)

    env.reset()
    _rollout(env, timestep_limit=60*30) # Check a rollout
示例#12
0
def test_steps_limit_restart():
    env = lab.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == None
    assert env._max_episode_steps == 2

    # Episode has started
    _, _, done, info = env.step([[]])
    assert done == [False]

    # Limit reached, now we get a done signal and the env resets itself
    _, _, done, info = env.step([[]])
    assert done == [True]
    assert env._elapsed_steps == 0
示例#13
0
def test_default_time_limit():
    # We need an env without a default limit
    register(
        id='test.NoLimitDummyVNCEnv-v0',
        entry_point='institute.envs:DummyVNCEnv',
        tags={
            'vnc': True,
        },
    )

    env = lab.make('test.NoLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
    assert env._max_episode_steps == None
示例#14
0
文件: envs.py 项目: SynthAI/SynthAI
def create_vncatari_env(env_id, client_id, remotes, **_):
    env = lab.make(env_id)
    env = Vision(env)
    env = Logger(env)
    env = BlockingReset(env)
    env = LabCoreAction(env)
    env = AtariRescale42x42(env)
    env = EpisodeID(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)

    logger.info('Connecting to remotes: %s', remotes)
    fps = env.metadata['video.frames_per_second']
    env.configure(remotes=remotes,
                  start_timeout=15 * 60,
                  fps=fps,
                  client_id=client_id)
    return env
示例#15
0
def test_peek():
    env = lab.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step([spaces.PeekReward])

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'resetting'

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert not info.get('mask.masked.observation')
    assert not info.get('mask.masked.action')
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'running'
示例#16
0
    def __init__(self, env, lab_core_id=None):
        super(LabCoreAction, self).__init__(env)

        if lab_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass lab_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            lab_core_id = self.spec._kwargs['lab_core_id']

        spec = lab.spec(lab_core_id)
        raw_action_space = lab_core_action_space(lab_core_id)

        self._actions = raw_action_space.actions
        self.action_space = lab_spaces.Discrete(len(self._actions))

        if spec._entry_point.startswith('lab.envs.atari:'):
            self.key_state = translator.AtariKeyState(lab.make(lab_core_id))
        else:
            self.key_state = None
示例#17
0
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    lab.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = lab.make(spec._kwargs['lab_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher,
            env,
            vnc_env,
            timestep_limit=1,
            stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
示例#18
0
def test_boundary_simple():
    env = lab.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 1,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 2,
        'done': True,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})

    # We have reward of 3 for episode 1, and episode 2 should now be resetting
    observation, reward, done, info = env.step([])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (3, True, 'resetting', '2')
示例#19
0
    parser.add_argument('-T',
                        '--start-timeout',
                        type=int,
                        default=None,
                        help='Rewarder session connection timeout (seconds)')
    args = parser.parse_args()

    logging.getLogger('lab').setLevel(logging.NOTSET)
    logging.getLogger('institute').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        env = lab.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    # env = wrappers.BlockingReset(env)
    if not isinstance(env, wrappers.LabCoreAction):
        # The LabCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.experimental.SafeActionSpace(env)
    else:
        # Only lab-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)
示例#20
0
def test_boundary_multiple():
    env = lab.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    # episode 2
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 2,
        'done': True,
        'info': {}
    }, {'episode_id': '2'})

    # episode 3
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '3'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '3'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 3,
        'done': True,
        'info': {}
    }, {'episode_id': '3'})

    # episode 4
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '4'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '4'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 4,
        'done': False,
        'info': {}
    }, {'episode_id': '4'})

    observation, reward, done, info = env.step([])
    assert not info.get('mask.masked.observation')
    assert not info.get('mask.masked.action')
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (2, True, 'running', '4')
    assert (info['env_status.complete.env_state'],
            info['env_status.complete.episode_id']) == ('running', '2')

    observation, reward, done, info = env.step([])
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (4, False, 'running', '4')