예제 #1
0
def test_vnc_env():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, None, None)

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, 'resetting',
                                               '1')

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 10,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 15,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': -3,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert sorted(observation.keys()) == ['text', 'vision']
    assert observation['text'] == []
    assert observation['vision'].shape == (768, 1024, 3)
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (22, False, 'running', '1')
    assert info['stats.reward.count'] == 3
예제 #2
0
def create_dd_env(remotes=None, client_id=None):
    env = gym.make('flashgames.DuskDrive-v0')
    assert env.metadata['runtime.vectorized']

    env = wrappers.Logger(env)
    env = wrappers.BlockingReset(env)
    env = wrappers.Vision(env)
    env = wrappers.EpisodeID(env)
    env = wrappers.Unvectorize(env)

    env.configure(remotes=remotes, fps=10, observer=True, client_id=client_id)

    return env
예제 #3
0
def test_smoke(env_id):
    """Check that environments start up without errors and that we can extract rewards and observations"""
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make(env_id)
    env = wrappers.Unvectorize(env)

    if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'
                      ):  # Used to test universe-envs in CI
        configure_with_latest_docker_runtime_tag(env)
    else:
        env.configure(remotes=1)

    env.reset()
    _rollout(env, timestep_limit=60 * 30)  # Check a rollout
예제 #4
0
def test_peek():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step([spaces.PeekReward])

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'resetting'

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert not info.get('mask.masked.observation')
    assert not info.get('mask.masked.action')
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'running'
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = gym.make(spec._kwargs['gym_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher,
            env,
            vnc_env,
            timestep_limit=1,
            stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
예제 #6
0
def test_boundary_simple():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 1,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 2,
        'done': True,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})

    # We have reward of 3 for episode 1, and episode 2 should now be resetting
    observation, reward, done, info = env.step([])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (3, True, 'resetting', '2')
예제 #7
0
def test_boundary_multiple():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    # episode 2
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '2'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 2,
        'done': True,
        'info': {}
    }, {'episode_id': '2'})

    # episode 3
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '3'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '3'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 3,
        'done': True,
        'info': {}
    }, {'episode_id': '3'})

    # episode 4
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '4'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '4'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 4,
        'done': False,
        'info': {}
    }, {'episode_id': '4'})

    observation, reward, done, info = env.step([])
    assert not info.get('mask.masked.observation')
    assert not info.get('mask.masked.action')
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (2, True, 'running', '4')
    assert (info['env_status.complete.env_state'],
            info['env_status.complete.episode_id']) == ('running', '2')

    observation, reward, done, info = env.step([])
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (4, False, 'running', '4')
예제 #8
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('-r',
                        '--remotes',
                        default='vnc://127.0.0.1:5900',
                        help='Which VNC address to connect to.')
    parser.add_argument(
        '-e',
        '--env-id',
        default=None,
        help=
        'An env ID to optionally run upon startup (e.g. flashgames.DuskDrive-v0).'
    )
    parser.add_argument('-V',
                        '--no-vexpect',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-S',
                        '--no-scorer',
                        action='store_true',
                        help='Whether to use the scorer.')
    parser.add_argument('-E',
                        '--no-env',
                        action='store_true',
                        help='Whether to maintain an environment.')
    parser.add_argument('-I',
                        '--integrator-mode',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-R',
                        '--no-rewarder',
                        action='store_true',
                        help='Whether to enable the rewarder thread at all.')
    parser.add_argument('--rewarder-port',
                        type=int,
                        default=15900,
                        help='Which port to start the agent_conn thread')
    parser.add_argument('--rewarder-fps',
                        default=60,
                        type=float,
                        help='The frame rate for the rewarder.')
    parser.add_argument(
        '-i',
        '--idle-timeout',
        type=float,
        help=
        'How long to keep the environment around when it has no active connections'
    )
    parser.add_argument('--demonstration',
                        action='store_true',
                        help='Run a demonstration agent.')
    parser.add_argument(
        '--bot-demonstration',
        action='store_true',
        help=
        'Run a demonstrationa agent that connects to the vnc_recorder port, to record complete demos with no human playing'
    )

    args = parser.parse_args()

    # TODO: only activate in dev
    signal.signal(signal.SIGINT, lambda signal, frame: os._exit(10))

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    # Launch demonstration agent if requested

    if args.bot_demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} -r vnc://localhost:5899+15899 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent in bot mode: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    elif args.demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    logger.info(
        "Starting play_controlplane.py with the following: command=%s args=%s env=%s",
        sys.argv, args, os.environ)

    error_buffer = universe.utils.ErrorBuffer()

    env_status = universe.rewarder.EnvStatus()
    env_status.set_env_info(env_id=args.env_id, fps=args.rewarder_fps)

    cv = threading.Condition()
    control_buffer = remote.ControlBuffer(cv)
    agent_conn = remote.AgentConn(env_status,
                                  cv,
                                  control_buffer,
                                  error_buffer=error_buffer,
                                  idle_timeout=args.idle_timeout)
    agent_conn.listen(port=args.rewarder_port)

    # Logger gives us the diagnostics printing
    if not args.no_env:
        env = wrappers.Unvectorize(
            wrappers.Vision(wrappers.Logger(vnc_env.VNCEnv())))
        # Assert when given self-referential rewarder connection
        # This shows up as a '+15900' or similar port number in the remotes string
        assert '+' not in args.remotes, "Remotes may not have rewarder ports"
        env.configure(
            remotes=args.remotes,
            ignore_clock_skew=True,
            disable_action_probes=True,
            vnc_driver='go',
            vnc_kwargs={
                'encoding': 'zrle',
                'compress_level': 9
            },
            observer=True,
        )
    else:
        logger.info(
            'Running without environment, meaning reward and gameover parsing will be disabled'
        )
        env = None

    no_vexpect = args.no_vexpect or args.integrator_mode

    env_controller = EnvController(
        env,
        args.remotes,
        env_status,
        agent_conn,
        error_buffer=error_buffer,
        control_buffer=control_buffer,
        no_vexpect=no_vexpect,
        integrator_mode=args.integrator_mode,
    )
    env_controller.start()

    if not args.no_rewarder:
        rewarder = Rewarder(
            env,
            args.remotes,
            agent_conn,
            env_status=env_controller.env_status,
            trigger_reset=env_controller.trigger_reset,
            error_buffer=error_buffer,
            no_vexpect=no_vexpect,
            no_scorer=args.no_scorer,
        )
        rewarder.start()
    else:
        rewarder = None

    manhole.install(locals={
        'rewarder': rewarder,
        'env_controller': env_controller
    })

    # TODO: clean up this API, but good enough for now
    while True:
        try:
            error_buffer.blocking_check(timeout=60)
        except remote.Exit as e:
            logger.info('%s', e)
            return 0

    return 1
예제 #9
0
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    # Record episodes as we go
    episode_scores = []
    current_episode_score = 0.
    rewarded_during_current_episode = False
    while True:
        env = wrappers.Unvectorize(gym.make(args.env_id))

        # Jot down the env_id so the uploader can find it later
        env_id_file_dir = os.path.join(os.sep, 'tmp', 'demo')
        env_id_file_path = os.path.join(env_id_file_dir, 'env_id.txt')
        if not os.path.exists(env_id_file_dir):
            logger.info("[DemonstrationAgent] Creating directory %s",
                        env_id_file_dir)
            os.makedirs(env_id_file_dir)
        with open(env_id_file_path, 'w') as env_id_file:
            logger.info("[DemonstrationAgent] Writing env id to file %s",
                        env_id_file_path)
            env_id_file.write(args.env_id)

        # Connect through our recording proxies
        env.configure(remotes=args.remote, fps=args.fps, observer=True)