示例#1
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=1, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            
            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')
            
            # Test: not allowed more than one environment for TrajectoryRunner
            with pytest.raises(AssertionError):
                list_make_env2 = make_envs(make_env=make_gym_env, 
                                          env_id=env_name, 
                                          num_env=2, 
                                          init_seed=0)
                env2 = SerialVecEnv(list_make_env=list_make_env2)

                runner2 = TrajectoryRunner(agent=agent, env=env2, gamma=1.0)
            
            # Create runner
            runner = TrajectoryRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(N=3, T=4)

            assert len(D) == 3
            assert all([isinstance(d, Trajectory) for d in D])
            assert all([d.T == 4 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)
        
            # Long horizon
            D = runner(N=3, T=1000)
            for d in D:
                if d.T < 1000:
                    assert d.all_done[-1] == True
示例#2
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=2, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            assert env.num_env == 2

            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')

            # Create runner
            runner = SegmentRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(T=3, reset=False)

            assert len(D) == 2
            assert all([isinstance(d, Segment) for d in D])
            assert all([d.T == 3 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)

            # Take one more step, test rolling effect, i.e. first state should be same as last state in previous D
            D2 = runner(T=1, reset=False)
            assert len(D2) == 2
            assert all([d.T == 1 for d in D2])
            for d, d2 in zip(D, D2):
                assert np.allclose(d2.all_s[0][0], d.transitions[-1].s_next)

            # Long horizon
            D = runner(T=200, reset=True)
            # Segment with identical time steps
            assert all([d.T == 200 for d in D])
            # For CartPole, 200 time steps, should be somewhere done=True
            if env_name == 'CartPole-v1':
                assert any([True in d.all_done for d in D])
                assert all([len(d.trajectories) > 1 for d in D])
示例#3
0
 def make_env_spec(self):
     list_make_env = make_envs(make_env=make_gym_env, 
                               env_id='CartPole-v1', 
                               num_env=3, 
                               init_seed=0)
     venv = SerialVecEnv(list_make_env=list_make_env, rolling=True)
     env_spec = EnvSpec(venv)
     
     return env_spec
示例#4
0
    def make_env_spec(self):
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id='Pendulum-v0',
                                  num_env=1,
                                  init_seed=0)
        venv = SerialVecEnv(list_make_env=list_make_env)
        env_spec = EnvSpec(venv)

        return env_spec
示例#5
0
def test_make_envs():
    list_make_env = make_envs(make_env=make_gym_env, env_id='Pendulum-v0', num_env=3, init_seed=1)
    assert len(list_make_env) == 3
    assert list_make_env[0] != list_make_env[1] and list_make_env[0] != list_make_env[2]

    # Test if the seedings are correct
    seeder = Seeder(init_seed=1)
    seeds = seeder(3)
    for make_env, seed in zip(list_make_env, seeds):
        assert make_env.keywords['seed'] == seed
    env = list_make_env[0]()
    raw_env = gym.make('Pendulum-v0')
    raw_env.seed(seeds[0])
    assert np.allclose(env.reset(), raw_env.reset())
示例#6
0
    def __call__(self, config):
        # Set random seeds: PyTorch, numpy.random, random
        set_global_seeds(seed=config['seed'])

        # Make a list of make_env functions
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id=config['env:id'],
                                  num_env=config['train:N'],
                                  init_seed=config['seed'] * 2)
        # Create vectorized environment
        env = SerialVecEnv(list_make_env=list_make_env)
        # Create environment specification
        env_spec = EnvSpec(env)

        # Create device
        device = torch.device(
            f'cuda:{config["cuda_id"]}' if config['cuda'] else 'cpu')

        # Create policy
        network = MLP(config=config).to(device)
        policy = CategoricalPolicy(network=network, env_spec=env_spec)

        # Create optimizer
        optimizer = optim.Adam(policy.network.parameters(),
                               lr=config['algo:lr'])
        # Create learning rate scheduler
        if config['algo:use_lr_scheduler']:
            max_epoch = config[
                'train:iter']  # Max number of lr decay, Note where lr_scheduler put
            lambda_f = lambda epoch: 1 - epoch / max_epoch  # decay learning rate for each training epoch
            lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                       lr_lambda=lambda_f)

        # Create agent
        kwargs = {'device': device}
        if config['algo:use_lr_scheduler']:
            kwargs['lr_scheduler'] = lr_scheduler
        agent = A2CAgent(policy=policy,
                         optimizer=optimizer,
                         config=config,
                         **kwargs)

        # Create runner
        runner = SegmentRunner(agent=agent,
                               env=env,
                               gamma=config['algo:gamma'])

        # Create engine
        engine = Engine(agent=agent, runner=runner, config=config, logger=None)

        # Training and evaluation
        train_logs = []
        eval_logs = []
        for i in range(config['train:iter']):
            train_output = engine.train(i)

            # Logging and evaluation
            if i == 0 or (i + 1) % config['log:interval'] == 0:
                # Log training and record the loggings
                train_logger = engine.log_train(train_output)
                train_logs.append(train_logger.logs)
                # Log evaluation and record the loggings
                eval_output = engine.eval(i)
                eval_logger = engine.log_eval(eval_output)
                eval_logs.append(eval_logger.logs)

        # Save the loggings
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'train', train_logs)
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'eval', eval_logs)

        return None