示例#1
0
def test_connect():
    env = gym.make('flashgames.DuskDrive-v0')
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    vnc_session = get_vnc_session(env)
    rewarder_session = get_rewarder_session(env)

    assert vnc_session._to_dict() == {
        '0': {
            'name': '0',
            'subsample_level': 2,
            'encoding': 'tight',
            'fine_quality_level': 50,
            'start_timeout': 7,
            'address': 'example.com:5900',
            'password': '******'
        }
    }
    assert rewarder_session._to_dict() == {
        '0': {
            'start_timeout': 7,
            'seed': None,
            'name': '0',
            'fps': 60,
            'address': 'example.com:15900',
            'env_id': 'flashgames.DuskDrive-v0',
            'password': '******',
            'skip_network_calibration': False,
            'observer': False,
            'label': '0:example.com:5900'
        }
    }
示例#2
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    env = gym.make('wob.mini.ClickDialog-v0')
    env = jiminy.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env,
                      wob_vnc.remotes_url(port_ofs=0,
                                          hostname='0.0.0.0',
                                          count=REMOTES_COUNT)
                      )  # automatically creates a local docker container

    observation_n = env.reset()
    idx = 0
    while True:
        # your agent here
        #
        # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
        action_n = [env.action_space.sample() for ob in observation_n]
        observation_n, reward_n, done_n, info = env.step(action_n)
        print("idx: {}, reward: {}".format(idx * REMOTES_COUNT, reward_n))
        idx += 1
    return 0
示例#3
0
def test_describe_handling():
    env = gym.make('flashgames.DuskDrive-v0')
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    reward_buffer = get_reward_buffer(env)
    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == None
    assert reward_buffer.reward_state(
        reward_buffer._current_episode_id)._env_state == None

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    assert reward_buffer._remote_episode_id == '1'
    assert reward_buffer._remote_env_state == 'resetting'
    assert reward_buffer._current_episode_id == '1'
    assert reward_buffer.reward_state(
        reward_buffer._current_episode_id)._env_state == 'resetting'
示例#4
0
def test_vnc_env():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, None, None)

    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert (observation, reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (None, 0, False, 'resetting',
                                               '1')

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 10,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 15,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': -3,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})

    observation, reward, done, info = env.step(
        [spaces.KeyEvent.by_name('a', down=True)])
    assert sorted(observation.keys()) == ['text', 'vision']
    assert observation['text'] == []
    assert observation['vision'].shape == (768, 1024, 3)
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (22, False, 'running', '1')
    assert info['stats.reward.count'] == 3
示例#5
0
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp)
        env.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
示例#6
0
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
示例#7
0
def test_steps_limit_restart_unused_when_not_wrapped():
    env = gym.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env.reset()

    for i in range(10):
        _, _, done, info = env.step([[]])
        assert done == [False]
示例#8
0
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
示例#9
0
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
示例#10
0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
示例#11
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('-o',
                        '--output',
                        required=True,
                        help='Where to save trace.')
    parser.add_argument('-e',
                        '--env-id',
                        default='Pong-v3',
                        help='Which env to run.')
    parser.add_argument('-s',
                        '--vnc-address',
                        default='127.0.0.1:5900',
                        help='Address of the VNC server to run on.')
    parser.add_argument('-r',
                        '--rewarder-address',
                        default='127.0.0.1:15900',
                        help='Address of the rewarder server to run on.')
    parser.add_argument('-S', '--seed', type=int, default=0, help='Set seed.')

    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    observations = []

    vnc = args.env_id.startswith('VNC')
    env = gym.make(args.env_id)
    if args.seed is not None:
        env.seed(args.seed)
    if vnc:
        env.configure(vnc_address=args.vnc_address,
                      rewarder_address=args.rewarder_address)
        noop = []
    else:
        assert env.get_action_meanings()[0] == 'NOOP'
        noop = 0

    ob = env.reset()
    observations.append(ob)

    for i in range(100):
        ob, reward, done, info = env.step(noop)
        observations.append(ob)

    np.save(args.output, observations)

    return 0
示例#12
0
def test_semisuper_true_rewards():
    env = gym.make('SemisuperPendulumNoise-v0')
    env.reset()

    observation, perceived_reward, done, info = env.step(env.action_space.sample())
    true_reward = info['true_reward']

    # The noise in the reward should ensure these are different. If we get spurious errors, we can remove this check
    assert perceived_reward != true_reward
示例#13
0
def test_video_callable_true_not_allowed():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        try:
            env = Monitor(env, temp, video_callable=True)
        except error.Error:
            pass
        else:
            assert False
示例#14
0
def test_joint():
    env1 = gym.make('test.DummyVNCEnv-v0')
    env2 = gym.make('test.DummyVNCEnv-v0')
    env1.configure(_n=3)
    env2.configure(_n=3)
    for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]:
        reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60)
        reward_buffer.reset('1')
        reward_buffer.push('1', 10, False, {})

    env = wrappers.Joint([env1, env2])
    assert env.n == 6
    observation_n = env.reset()
    assert observation_n == [None] * 6

    observation_n, reward_n, done_n, info = env.step([[]
                                                      for _ in range(env.n)])
    assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0]
    assert done_n == [False] * 6
示例#15
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
示例#16
0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
示例#17
0
def test_no_double_wrapping():
    temp = tempfile.mkdtemp()
    try:
        env = gym.make("FrozenLake-v0")
        env = wrappers.Monitor(env, temp)
        try:
            env = wrappers.Monitor(env, temp)
        except error.DoubleWrapperError:
            pass
        else:
            assert False, "Should not allow double wrapping"
        env.close()
    finally:
        shutil.rmtree(temp)
示例#18
0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
示例#19
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
示例#20
0
def test_default_time_limit():
    # We need an env without a default limit
    register(
        id='test.NoLimitDummyVNCEnv-v0',
        entry_point='jiminy.envs:DummyVNCEnv',
        tags={
            'vnc': True,
        },
    )

    env = gym.make('test.NoLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
    assert env._max_episode_steps == None
示例#21
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
示例#22
0
def test_steps_limit_restart():
    env = gym.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == None
    assert env._max_episode_steps == 2

    # Episode has started
    _, _, done, info = env.step([[]])
    assert done == [False]

    # Limit reached, now we get a done signal and the env resets itself
    _, _, done, info = env.step([[]])
    assert done == [True]
    assert env._elapsed_steps == 0
示例#23
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
示例#24
0
    def __init__(self, env, gym_core_id=None):
        super(GymCoreAction, self).__init__(env)

        if gym_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass gym_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            gym_core_id = self.spec._kwargs['gym_core_id']

        spec = gym.spec(gym_core_id)
        raw_action_space = gym_core_action_space(gym_core_id)

        self._actions = raw_action_space.actions
        self.action_space = gym_spaces.Discrete(len(self._actions))

        if spec._entry_point.startswith('gym.envs.atari:'):
            self.key_state = translator.AtariKeyState(gym.make(gym_core_id))
        else:
            self.key_state = None
示例#25
0
def test_peek():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    observation, reward, done, info = env.step([spaces.PeekReward])

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'resetting'

    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'running',
        'fps': 60
    }, {'episode_id': '2'})
    observation, reward, done, info = env.step([spaces.PeekReward])
    assert not info.get('mask.masked.observation')
    assert not info.get('mask.masked.action')
    assert info['env_status.episode_id'] == '1'
    assert info['env_status.env_state'] == 'resetting'
    assert info['env_status.peek.episode_id'] == '2'
    assert info['env_status.peek.env_state'] == 'running'
示例#26
0
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()
示例#27
0
def load_demo(dir_name, env_name, read_text=False):
    """
    Loads demonstration from the specified directory, filtering by env name
    :param dir_name:
    :param env_name:
    :return: list of (obs, action) tuples
    """
    result = []

    env = gym.make(env_name)
    env = jiminy.wrappers.experimental.SoftmaxClickMouse(env)

    def mouse_to_action(pointer_event):
        return env._action_to_discrete(pointer_event)

    for demo_dir in iterate_demo_dirs(dir_name, env_name):
        client_header, client_messages = \
            read_fbp_file(os.path.join(demo_dir, "client.fbs"),
                          rfp_client.RfpClient, rfp_client.RfpClient.Header,
                          rfp_client.RfpClient.Message)

        srv_header, srv_messages = \
            read_fbp_file(os.path.join(demo_dir, "server.fbs"),
                          rfp_server.RfpServer, rfp_server.RfpServer.Header,
                          rfp_server.RfpServer.Message)

        if read_text:
            text_entries = read_text_entries(
                os.path.join(demo_dir, "rewards.demo"))
        else:
            text_entries = None

        samples = extract_samples(client_header,
                                  client_messages,
                                  srv_header,
                                  srv_messages,
                                  text_entries=text_entries,
                                  mouse_to_action=mouse_to_action)
        result.extend(samples)

    return result
示例#28
0
def test_boundary_simple():
    env = gym.make('flashgames.DuskDrive-v0')
    env = wrappers.Unvectorize(env)
    env.configure(vnc_driver=FakeVNCSession,
                  rewarder_driver=FakeRewarder,
                  remotes='vnc://example.com:5900+15900')
    env.reset()

    rewarder_client = get_rewarder_client(env)
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'})

    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 1,
        'done': False,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.reward', {
        'reward': 2,
        'done': True,
        'info': {}
    }, {'episode_id': '1'})
    rewarder_client._manual_recv('v0.env.describe', {
        'env_id': 'flashgames.DuskDrive-v0',
        'env_state': 'resetting',
        'fps': 60
    }, {'episode_id': '2'})

    # We have reward of 3 for episode 1, and episode 2 should now be resetting
    observation, reward, done, info = env.step([])
    assert info['mask.masked.observation']
    assert info['mask.masked.action']
    assert (reward, done, info['env_status.env_state'],
            info['env_status.episode_id']) == (3, True, 'resetting', '2')
示例#29
0
    parser.add_argument('-N',
                        '--max-steps',
                        type=int,
                        default=10**7,
                        help='Maximum number of steps to take')
    args = parser.parse_args()

    logging.getLogger('gym').setLevel(logging.NOTSET)
    logging.getLogger('jiminy').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    # env = wrappers.BlockingReset(env)
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)
示例#30
0
    name = env_name.split('.')[-1] + "_" + args.name
    writer = SummaryWriter(comment="-wob_click_" + name)
    saves_path = os.path.join(SAVES_DIR, name)
    os.makedirs(saves_path, exist_ok=True)

    demo_samples = None
    if args.demo:
        demo_samples = vnc_demo.load_demo(args.demo, env_name)
        if not demo_samples:
            demo_samples = None
            print("Demo not found")
        else:
            print("Loaded %d demo samples, will use them during training" % len(demo_samples))

    env = gym.make(env_name)
    env = jiminy.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=args.host, count=REMOTES_COUNT))

    net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n).to(device)
    print(net)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], device=device, apply_softmax=True)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        [env], agent, gamma=GAMMA, steps_count=REWARD_STEPS, vectorized=True)

    best_reward = None
    with common.RewardTracker(writer) as tracker:
        with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker: