Пример #1
0
def main(env_config, model_config, agent_config, buffer_config, train=train):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config['precision'])

    create_model, Agent = pkg.import_agent(config=agent_config)
    Buffer = pkg.import_module('buffer', config=agent_config).Buffer

    use_ray = env_config.get('n_workers', 1) > 1
    if use_ray:
        import ray
        from utility.ray_setup import sigint_shutdown_ray
        ray.init()
        sigint_shutdown_ray()

    env = create_env(env_config, force_envvec=True)
    eval_env_config = env_config.copy()
    if 'num_levels' in eval_env_config:
        eval_env_config['num_levels'] = 0
    if 'seed' in eval_env_config:
        eval_env_config['seed'] += 1000
    eval_env_config['n_workers'] = 1
    for k in list(eval_env_config.keys()):
        # pop reward hacks
        if 'reward' in k:
            eval_env_config.pop(k)
    eval_env = create_env(eval_env_config, force_envvec=True)

    def sigint_handler(sig, frame):
        signal.signal(sig, signal.SIG_IGN)
        env.close()
        eval_env.close()
        sys.exit(0)

    signal.signal(signal.SIGINT, sigint_handler)

    models = create_model(model_config, env)

    buffer_config['n_envs'] = env.n_envs
    buffer_config['state_keys'] = models.state_keys
    buffer = Buffer(buffer_config)

    agent = Agent(config=agent_config, models=models, dataset=buffer, env=env)

    agent.save_config(
        dict(env=env_config,
             model=model_config,
             agent=agent_config,
             buffer=buffer_config))

    train(agent, env, eval_env, buffer)

    if use_ray:
        env.close()
        eval_env.close()
        ray.shutdown()
Пример #2
0
def main(env_config, model_config, agent_config, replay_config):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config.get('precision', 32))

    use_ray = env_config.get('n_workers', 1) > 1
    if use_ray:
        import ray
        from utility.ray_setup import sigint_shutdown_ray
        ray.init()
        sigint_shutdown_ray()

    env = create_env(env_config)
    eval_env_config = env_config.copy()
    eval_env_config['n_workers'] = 1
    eval_env_config['n_envs'] = 1
    reward_key = [k for k in eval_env_config.keys() if 'reward' in k]
    [eval_env_config.pop(k) for k in reward_key]
    eval_env = create_env(eval_env_config, force_envvec=True)

    agent_config['N_UPDATES'] *= env_config['n_workers'] * env_config['n_envs']
    create_model, Agent = pkg.import_agent(config=agent_config)
    models = create_model(model_config, env)

    n_workers = env_config.get('n_workers', 1)
    n_envs = env_config.get('n_envs', 1)
    replay_config['n_envs'] = n_workers * n_envs
    replay_config['seqlen'] = env.max_episode_steps
    if getattr(models, 'state_keys', ()):
        replay_config['state_keys'] = list(models.state_keys)
    replay = create_replay(replay_config)
    replay.load_data()

    am = pkg.import_module('agent', config=agent_config)
    data_format = am.get_data_format(env=env,
                                     replay_config=replay_config,
                                     agent_config=agent_config,
                                     model=models)
    dataset = create_dataset(replay, env, data_format=data_format)

    agent = Agent(config=agent_config, models=models, dataset=dataset, env=env)

    agent.save_config(
        dict(env=env_config,
             model=model_config,
             agent=agent_config,
             replay=replay_config))

    train(agent, env, eval_env, replay)

    if use_ray:
        ray.shutdown()
Пример #3
0
 def test_Env(self):
     for name in [
             'atari_pong', 'atari_breakout', 'BipedalWalkerHardcore-v3'
     ]:
         for life_done in [False, True]:
             for _ in range(2):
                 config = default_config.copy()
                 config['name'] = name
                 config['n_envs'] = 1
                 config['life_done'] = life_done
                 env = create_env(config)
                 cr = 0
                 n = 0
                 re = 0
                 for i in range(2000):
                     a = env.random_action()
                     s, r, d, re = env.step(a)
                     cr += r
                     if r != 0:
                         print(name, i, r, cr, env.score())
                     n += env.info().get('frame_skip', 1)
                     np.testing.assert_equal(cr, env.score())
                     np.testing.assert_equal(n, env.epslen())
                     if env.info().get('game_over'):
                         cr = 0
                         n = 0
Пример #4
0
        def __init__(self,
                     *,
                     config,
                     name='Evaluator',
                     model_config,
                     env_config,
                     model_fn):
            config_actor(name, config)

            env_config.pop('reward_clip', False)
            self.env = env = create_env(env_config)

            model = model_fn(config=model_config, env=env)

            super().__init__(
                name=name,
                config=config,
                models=model,
                dataset=None,
                env=env,
            )

            # the names of network modules that should be in sync with the learner
            if not hasattr(self, '_pull_names'):
                self._pull_names = [
                    k for k in self.model.keys() if 'target' not in k
                ]

            # used for recording evaluator side info
            self._info = collections.defaultdict(list)
Пример #5
0
        def __init__(self, model_fn, replay, config, model_config, env_config,
                     replay_config):
            config_actor('Learner', config)

            env = create_env(env_config)

            model = model_fn(config=model_config, env=env)

            am = pkg.import_module('agent', config=config, place=-1)
            data_format = am.get_data_format(env=env,
                                             replay_config=replay_config,
                                             agent_config=config,
                                             model=model)
            dataset = create_dataset(replay,
                                     env,
                                     data_format=data_format,
                                     use_ray=True)

            super().__init__(
                name='Learner',
                config=config,
                models=model,
                dataset=dataset,
                env=env,
            )
Пример #6
0
 def __init__(self, name, worker_id, env_config):
     cpu_affinity(f'Worker_{worker_id}')
     self.name = name
     self._id = worker_id
     self._n_envs = env_config['n_envs']
     env_config['n_workers'] = env_config['n_envs'] = 1
     self._envs = [create_env(env_config) for _ in range(self._n_envs)]
Пример #7
0
        def __init__(self, worker_id, config, env_config, buffer_config):
            config_attr(self, config)
            cpu_affinity(f'Worker_{worker_id}')
            self._id = worker_id

            self._n_envvecs = env_config.pop('n_envvecs')
            env_config.pop('n_workers', None)
            self._envvecs = [
                create_env(env_config, force_envvec=True)
                for _ in range(self._n_envvecs)
            ]

            collect_fn = pkg.import_module('agent', config=config,
                                           place=-1).collect
            self._collect = functools.partial(collect_fn,
                                              env=None,
                                              step=None,
                                              reset=None)

            buffer_config['force_envvec'] = True
            self._buffs = {
                eid: create_local_buffer(buffer_config)
                for eid in range(self._n_envvecs)
            }

            self._obs = {
                eid: e.output().obs
                for eid, e in enumerate(self._envvecs)
            }
            self._info = collections.defaultdict(list)
Пример #8
0
    def test_RayEnvVec(self):
        for name in [
                'atari_pong', 'atari_breakout', 'BipedalWalkerHardcore-v3'
        ]:
            for _ in range(3):
                config = default_config.copy()
                config['name'] = name
                ray.init()
                config['n_envs'] = 2
                config['n_workers'] = 2
                env = create_env(config)
                cr = np.zeros(env.n_envs)
                n = np.zeros(env.n_envs)
                for _ in range(2000):
                    a = env.random_action()
                    s, r, d, re = env.step(a)
                    cr += r
                    n += np.array([i.get('frame_skip', 1) for i in env.info()])
                    np.testing.assert_allclose(cr, env.score())
                    np.testing.assert_equal(n, env.epslen())
                    if np.any(re):
                        info = env.info()
                        for k, i in enumerate(info):
                            if i.get('game_over'):
                                cr[k] = 0
                                n[k] = 0

                ray.shutdown()
Пример #9
0
    def test_sper(self):
        config = dict(
            replay_type='seqper',  # per or uniform
            precision=32,
            # arguments for PER
            beta0=0.4,
            to_update_top_priority=False,

            # arguments for general replay
            batch_size=2,
            sample_size=7,
            burn_in_size=2,
            min_size=2,
            capacity=10000,
            state_keys=['h', 'c', 'prev_reward'],
            extra_keys=['obs', 'action', 'mu', 'mask'])
        env_config = dict(n_envs=1, name='dummy')
        from env.dummy import DummyEnv
        from env import wrappers
        from env.func import create_env

        def mkenv(config):
            env = DummyEnv(**config)
            env = wrappers.post_wrap(env, config)
            return env

        for n_envs in np.arange(2, 3):
            config['n_envs'] = n_envs
            env_config['n_envs'] = n_envs
            for burn_in_size in np.arange(0, config['sample_size']):
                config['burn_in_size'] = burn_in_size
                replay = create_replay(config)
                env = create_env(env_config, mkenv)
                out = env.output()
                o, prev_reward, d, reset = out
                for i in range(1, 10000):
                    a = np.random.randint(0, 10, n_envs)
                    no, r, d, reset = env.step(a)
                    if n_envs == 1:
                        h = np.ones(2) * r
                        c = np.ones(2) * r
                    else:
                        h = np.ones((n_envs, 2)) * r[:, None]
                        c = np.ones((n_envs, 2)) * r[:, None]
                    replay.add(obs=o,
                               reward=r,
                               discount=d,
                               h=h,
                               c=c,
                               mask=1 - reset,
                               prev_reward=prev_reward)
                    if replay.good_to_learn():
                        data = replay.sample()
                        np.testing.assert_equal(data['reward'][:, 0],
                                                data['h'][:, 0])
                        np.testing.assert_equal(data['obs'][:, 0, 0],
                                                data['c'][:, 0])
                    o = no
                    prev_reward = r
Пример #10
0
def main(env_config, model_config, agent_config, replay_config):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config['precision'])

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    env = create_env(env_config, make_env, force_envvec=True)
    eval_env_config = env_config.copy()
    eval_env_config['n_envs'] = 1
    eval_env_config['n_workers'] = 1
    eval_env = create_env(eval_env_config, make_env)

    replay_config['dir'] = agent_config['root_dir'].replace('logs', 'data')
    replay = create_replay(replay_config)
    replay.load_data()
    dtype = global_policy().compute_dtype
    data_format = pkg.import_module(
        'agent', config=agent_config).get_data_format(
            env=env,
            batch_size=agent_config['batch_size'],
            sample_size=agent_config['sample_size'],
            dtype=dtype)
    process = functools.partial(process_with_env,
                                env=env,
                                obs_range=[-.5, .5],
                                one_hot_action=True,
                                dtype=dtype)
    dataset = Dataset(replay, data_format, process)

    create_model, Agent = pkg.import_agent(config=agent_config)
    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=dataset, env=env)

    agent.save_config(
        dict(env=env_config,
             model=model_config,
             agent=agent_config,
             replay=replay_config))

    train(agent, env, eval_env, replay)
Пример #11
0
    def test_sequential_buffer_random(self):
        config = dict(
            replay_type='seqper',  # per or uniform
            # arguments for general replay
            n_envs=32,
            seqlen=16,
            reset_shift=2,
            state_keys=['h', 'c', 'prev_reward'],
            extra_keys=['obs', 'action', 'mu', 'mask'])
        env_config = dict(n_envs=1, name='dummy')
        from env.dummy import DummyEnv
        from env import wrappers
        from env.func import create_env

        def mkenv(config):
            env = DummyEnv(**config)
            env = wrappers.post_wrap(env, config)
            return env

        for n_envs in np.arange(2, 3):
            config['n_envs'] = n_envs
            env_config['n_envs'] = n_envs
            for burn_in_size in np.arange(0, config['seqlen']):
                config['burn_in_size'] = burn_in_size
                buff = create_local_buffer(config)
                env = create_env(env_config, mkenv)
                out = env.output()
                o, prev_reward, d, reset = out
                for i in range(1, 1000):
                    a = np.random.randint(0, 10, n_envs)
                    no, r, d, reset = env.step(a)
                    print(r)
                    if n_envs == 1:
                        h = np.ones(2) * r
                        c = np.ones(2) * r
                    else:
                        h = np.ones((n_envs, 2)) * r[:, None]
                        c = np.ones((n_envs, 2)) * r[:, None]
                    buff.add(obs=o,
                             reward=r,
                             discount=d,
                             h=h,
                             c=c,
                             mask=1 - reset,
                             prev_reward=prev_reward)
                    if buff.is_full():
                        data_list = buff.sample()
                        if n_envs == 1:
                            data_list = [data_list]
                        for data in data_list:
                            np.testing.assert_equal(data['reward'][0],
                                                    data['h'][0])
                            np.testing.assert_equal(data['obs'][0, 0],
                                                    data['c'][0])
                        buff.reset()
                    prev_reward = r
                    o = no
Пример #12
0
def main(env_config, model_config, agent_config, buffer_config):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config['precision'])

    create_model, Agent = pkg.import_agent(config=agent_config)
    Buffer = pkg.import_module('buffer', config=agent_config).Buffer

    use_ray = env_config.get('n_workers', 1) > 1
    if use_ray:
        import ray
        from utility.ray_setup import sigint_shutdown_ray
        ray.init()
        sigint_shutdown_ray()

    env = create_env(env_config, force_envvec=True)
    eval_env_config = env_config.copy()
    eval_env_config['seed'] += 1000
    eval_env_config['n_workers'] = 1
    eval_env_config['n_envs'] = 1
    for k in list(eval_env_config.keys()):
        # pop reward hacks
        if 'reward' in k:
            eval_env_config.pop(k)
    eval_env = create_env(eval_env_config, force_envvec=True)

    models = create_model(model_config, env)

    buffer_config['n_envs'] = env.n_envs
    buffer = Buffer(buffer_config)

    agent = Agent(config=agent_config, models=models, dataset=buffer, env=env)

    agent.save_config(
        dict(env=env_config,
             model=model_config,
             agent=agent_config,
             buffer=buffer_config))

    train(agent, env, eval_env, buffer)

    if use_ray:
        import ray
        ray.shutdown()
Пример #13
0
        def __init__(self, *, worker_id, config, model_config, env_config,
                     buffer_config, model_fn, buffer_fn):
            config_actor(f'Worker_{worker_id}', config)
            self._id = worker_id

            self.env = create_env(env_config)

            buffer_config['n_envs'] = self.env.n_envs
            if 'seqlen' not in buffer_config:
                buffer_config['seqlen'] = self.env.max_episode_steps
            self.buffer = buffer_fn(buffer_config)

            models = model_fn(config=model_config, env=self.env)

            super().__init__(name=f'Worker_{worker_id}',
                             config=config,
                             models=models,
                             dataset=self.buffer,
                             env=self.env)

            # setup runner
            import importlib
            em = importlib.import_module(
                f'env.{env_config["name"].split("_")[0]}')
            info_func = em.info_func if hasattr(em, 'info_func') else None
            self._run_mode = getattr(self, '_run_mode', RunMode.NSTEPS)
            assert self._run_mode in [RunMode.NSTEPS, RunMode.TRAJ]
            self.runner = Runner(self.env,
                                 self,
                                 nsteps=self.SYNC_PERIOD
                                 if self._run_mode == RunMode.NSTEPS else None,
                                 run_mode=self._run_mode,
                                 record_envs=getattr(self, '_record_envs',
                                                     None),
                                 info_func=info_func)

            # worker side prioritization
            self._worker_side_prioritization = getattr(
                self, '_worker_side_prioritization', False)
            self._return_stats = self._worker_side_prioritization \
                or buffer_config.get('max_steps', 0) > buffer_config.get('n_steps', 1)

            # setups self._collect using <collect> function from the algorithm module
            collect_fn = pkg.import_module('agent',
                                           algo=self._algorithm,
                                           place=-1).collect
            self._collect = functools.partial(collect_fn, self.buffer)

            # the names of network modules that should be in sync with the learner
            if not hasattr(self, '_pull_names'):
                self._pull_names = [
                    k for k in self.model.keys() if 'target' not in k
                ]

            # used for recording worker side info
            self._info = collections.defaultdict(list)
Пример #14
0
        def __init__(self, actor_id, model_fn, config, model_config,
                     env_config):
            config_actor('Actor', config)

            self._id = actor_id

            self._n_envvecs = env_config['n_envvecs']
            self._n_envs = env_config['n_envs']
            env = create_env(env_config)

            models = model_fn(model_config, env)

            super().__init__(name=f'Actor_{actor_id}',
                             config=config,
                             models=models,
                             dataset=None,
                             env=env)

            # number of workers per actor
            self._wpa = self._n_workers // self._n_actors

            self._action_batch = int(self._n_workers * self._n_envvecs *
                                     self._action_frac)
            if 'act_eps' in config:
                act_eps = compute_act_eps(config['act_eps_type'],
                                          config['act_eps'], None,
                                          config['n_workers'],
                                          self._n_envvecs * self._n_envs)
                self._act_eps_mapping = act_eps.reshape(
                    config['n_workers'], self._n_envvecs, self._n_envs)
                print(self.name, self._act_eps_mapping)
            else:
                self._act_eps_mapping = None

            # agent's state
            if 'rnn' in self.model:
                self._state_mapping = collections.defaultdict(
                    lambda: self.model.get_initial_state(batch_size=env.n_envs,
                                                         dtype=self._dtype))
                self._prev_action_mapping = collections.defaultdict(
                    lambda: tf.zeros(
                        (env.n_envs, *self._action_shape), self._dtype))

            if not hasattr(self, '_pull_names'):
                self._pull_names = [
                    k for k in self.model.keys() if 'target' not in k
                ]

            self._to_sync = Every(self.SYNC_PERIOD) if getattr(
                self, 'SYNC_PERIOD') else None
Пример #15
0
def main(env_config,
         model_config,
         agent_config,
         replay_config,
         n,
         record=False,
         size=(128, 128),
         video_len=1000,
         fps=30,
         save=False):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config.get('precision', 32))

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    algo_name = agent_config['algorithm']
    env_name = env_config['name']

    try:
        make_env = pkg.import_module('env', algo_name, place=-1).make_env
    except:
        make_env = None
    env_config.pop('reward_clip', False)
    env = create_env(env_config, env_fn=make_env)
    create_model, Agent = pkg.import_agent(config=agent_config)
    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=None, env=env)

    if n < env.n_envs:
        n = env.n_envs
    scores, epslens, video = evaluate(env,
                                      agent,
                                      n,
                                      record=record,
                                      size=size,
                                      video_len=video_len)
    pwc(f'After running {n} episodes',
        f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}',
        color='cyan')

    if record:
        save_video(f'{algo_name}-{env_name}', video, fps=fps)
    if use_ray:
        ray.shutdown()
Пример #16
0
        def __init__(self,
                    name, 
                    model_fn,
                    config, 
                    model_config,
                    env_config, 
                    replay_config):
            cpu_affinity('Learner')
            silence_tf_logs()
            configure_threads(config['n_cpus'], config['n_cpus'])
            configure_gpu()
            configure_precision(config['precision'])
            self._dtype = global_policy().compute_dtype

            self._envs_per_worker = env_config['n_envs']
            env_config['n_envs'] = 1
            env = create_env(env_config)
            assert env.obs_dtype == np.uint8, \
                f'Expect image observation of type uint8, but get {env.obs_dtype}'
            self._action_shape = env.action_shape
            self._action_dim = env.action_dim
            self._frame_skip = getattr(env, 'frame_skip', 1)

            self.models = Ensemble(
                model_fn=model_fn,
                config=model_config, 
                obs_shape=env.obs_shape,
                action_dim=env.action_dim, 
                is_action_discrete=env.is_action_discrete
            )

            super().__init__(
                name=name, 
                config=config, 
                models=self.models,
                dataset=None,
                env=env)

            replay_config['dir'] = config['root_dir'].replace('logs', 'data')
            self.replay = create_replay(replay_config)
            data_format = get_data_format(env, replay_config)
            process = functools.partial(process_with_env, env=env)
            self.dataset = Dataset(self.replay, data_format, process, prefetch=10)

            self._env_step = self.env_step()
Пример #17
0
        def __init__(self,
                    name,
                    model_fn,
                    config,
                    model_config,
                    env_config):
            cpu_affinity('Actor')
            silence_tf_logs()
            configure_threads(1, 1)
            configure_gpu()
            configure_precision(config['precision'])
            self._dtype = global_policy().compute_dtype

            self._envs_per_worker = env_config['n_envs']
            env_config['n_envs'] = config['action_batch']
            env = create_env(env_config)
            assert self.env.obs_dtype == np.uint8, \
                f'Expect image observation of type uint8, but get {self.env.obs_dtype}'
            self._action_shape = self.env.action_shape
            self._action_dim = self.env.action_dim

            self.models = Ensemble(
                model_fn=model_fn,
                config=model_config, 
                obs_shape=self.env.obs_shape,
                action_dim=self.env.action_dim, 
                is_action_discrete=self.env.is_action_discrete
            )

            super().__init__(
                name=name, 
                config=config, 
                models=self.models,
                dataset=None,
                env=self.env)
            
            # cache for episodes
            self._cache = collections.defaultdict(list)

            # agent's state
            self._state = collections.defaultdict(lambda:
                self.rssm.get_initial_state(batch_size=1, dtype=self._dtype))
            self._prev_action = collections.defaultdict(lambda:
                tf.zeros((1, self._action_dim), self._dtype))
Пример #18
0
def main(env_config,
         model_config,
         agent_config,
         replay_config,
         n,
         record=False,
         size=(128, 128),
         video_len=1000,
         fps=30,
         save=False):
    logging.basicConfig(level=logging.DEBUG)
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config.get('precision', 32))

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    algo_name = agent_config['algorithm']
    env_name = env_config['name']

    if record:
        env_config['log_episode'] = True
        env_config['n_workers'] = env_config['n_envs'] = 1

    env = create_env(env_config)

    create_model, Agent = pkg.import_agent(config=agent_config)

    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=None, env=env)

    if save:
        n_workers = env_config.get('n_workers', 1)
        n_envs = env_config.get('n_envs', 1)
        replay_config['n_envs'] = n_workers * n_envs
        replay_config['replay_type'] = 'uniform'
        replay_config['dir'] = f'data/{agent.name.lower()}-{env.name.lower()}'
        replay_config['n_steps'] = 1
        replay_config['save'] = True
        replay_config['save_temp'] = True
        replay_config['capacity'] = int(1e6)
        replay_config['has_next_obs'] = True
        replay = create_replay(replay_config)

        def collect(obs, action, reward, discount, next_obs, logpi, **kwargs):
            replay.add(obs=obs,
                       action=action,
                       reward=reward,
                       discount=discount,
                       next_obs=next_obs,
                       logpi=logpi)
    else:

        def collect(**kwargs):
            pass

    if n < env.n_envs:
        n = env.n_envs
    scores, epslens, video = evaluate(env,
                                      agent,
                                      n,
                                      record=record,
                                      size=size,
                                      video_len=video_len,
                                      step_fn=collect)
    pwc(f'After running {n} episodes',
        f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}',
        color='cyan')

    if save:
        replay.save()

    if record:
        save_video(f'{algo_name}-{env_name}', video, fps=fps)
    if use_ray:
        ray.shutdown()
Пример #19
0
        self._output = EnvOutput(obs, reward, discount, reset)
        # assert np.all(done) == info.get('game_over', False), (reset, info['game_over'])
        # assert np.all(reset) == info.get('game_over', False), (reset, info['game_over'])
        return self._output


def get_wrapper_by_name(env, classname):
    currentenv = env
    while True:
        if classname == currentenv.__class__.__name__:
            return currentenv
        elif hasattr(currentenv, 'env'):
            currentenv = currentenv.env
        else:
            # don't raise error here, only return None
            return None


if __name__ == '__main__':
    from env.func import create_env
    env = create_env(dict(name='smac_3s5z', seed=0))

    for i in range(10000):
        a = env.random_action()
        out = env.step(a)
        print(out[2:])
        if np.all(out.reset):
            info = env.info()
            print(info['score'], info['epslen'])
Пример #20
0
from env.func import create_env
import time
import numpy as np
import ray

from env import procgen

if __name__ == '__main__':
    config = dict(
        name='procgen_coinrun',
        n_envs=10,
    )

    def make_env(config):
        env = procgen.make_procgen_env(config)
        return env

    ray.init()
    env = create_env(config, make_env)

    print('Env', env)

    def run(env):
        st = time.time()
        for _ in range(10000):
            a = env.random_action()
            env.step(a)
        return time.time() - st

    print("Ray env:", run(env))