示例#1
0
    def work(self, master_cmd):
        task_id, task, seed = master_cmd

        # Don't use this seed
        # Set seed inside config
        config = task

        # Instantiate an algorithm
        algo = self.make_algo()

        # Print configuration
        Config.print_config(config)

        # Run the algorithm with given configuration
        result = algo(config)

        return task_id, result
示例#2
0
def test_run_experiment(num_sample, max_workers, chunksize):
    def run(config, seed, device, logdir):
        return config['ID'], seed, device, logdir

    config = Config(
        {
            'network.lr': Grid([1e-3, 5e-3]),
            'network.size': [32, 16],
            'env.id': Grid(['CartPole-v1', 'Ant-v2'])
        },
        num_sample=num_sample,
        keep_dict_order=True)
    seeds = [1, 2, 3]
    log_dir = './some_path'
    run_experiment(run,
                   config,
                   seeds,
                   log_dir,
                   max_workers,
                   chunksize,
                   use_gpu=False,
                   gpu_ids=None)

    p = Path('./some_path')
    assert p.exists()
    assert (p / 'configs.pkl').exists()
    assert (p / 'source_files').exists() and (p / 'source_files').is_dir()
    # Check all configuration folders with their IDs and subfolders for all random seeds
    for i in range(4):
        config_p = p / str(i)
        assert config_p.exists()
        assert (config_p / 'config.yml').exists()
        for seed in seeds:
            assert (config_p / str(seed)).exists()
    # Clean the logging directory
    rmtree(p)
    # Test remove
    assert not p.exists()
示例#3
0
    def make_configs(self):
        config = Config()

        config.add_grid(name='use_ConvVAE', val=[True, False])

        config.add_item(name='num_epochs', val=100)
        config.add_item(name='cuda', val=True)
        config.add_item(name='seed', val=1)
        config.add_item(name='batch_size', val=128)
        config.add_item(name='log_interval', val=100)

        configs = config.make_configs()

        return configs
示例#4
0
from lagom.utils import pickle_dump
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import run_experiment

from engine import Engine
from model import VAE
from model import ConvVAE

config = Config({
    'cuda': True,
    'log.dir': 'logs/ConvVAE',
    'log.interval': 100,
    'nn.type': 'ConvVAE',
    'nn.z_dim': 8,
    'train.num_epoch': 100,
    'train.batch_size': 128,
    'eval.batch_size': 128
})


def make_dataset(config):
    train_dataset = datasets.MNIST('data/',
                                   train=True,
                                   download=True,
                                   transform=transforms.ToTensor())
    test_dataset = datasets.MNIST('data/',
                                  train=False,
                                  transform=transforms.ToTensor())
    kwargs = {'num_workers': 1, 'pin_memory': True} if config['cuda'] else {}
示例#5
0
config = Config(
    {'cuda': True, 
     'log.dir': 'logs/default_new', 
     'log.freq': 10, 
     'checkpoint.freq': 50,
     
     'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 
     'env.standardize_obs': True,
     'env.standardize_reward': True,
     
     'nn.sizes': [64, 64],
     
     'agent.lr': 3e-4,
     'agent.use_lr_scheduler': True,
     'agent.gamma': 0.99,
     'agent.gae_lambda': 0.95,
     'agent.standardize_adv': True,  # standardize advantage estimates
     'agent.max_grad_norm': 0.5,  # grad clipping by norm
     'agent.entropy_coef': 0.0,  # PPO: no entropy bobus
     'agent.value_coef': 0.5,
     'agent.clip_range': 0.2,  # ratio clipping
     
     # only for continuous control
     'env.clip_action': True,  # clip action within valid bound before step()
     'agent.std0': 0.5,  # initial std
     'agent.std_style': 'exp',  # std parameterization
     'agent.std_range': None,  # bounded std: (min, max)
     'agent.beta': None,  # beta-sigmoidal
     
     'train.timestep': int(1e6),  # total number of training (environmental) timesteps
     'train.timestep_per_iter': 2048,  # number of timesteps per iteration
     'train.batch_size': 64,
     'train.num_epochs': 10
    })
示例#6
0
config = Config({
    'cuda':
    False,
    'log.dir':
    'logs/default',
    'log.freq':
    10,
    'checkpoint.freq':
    50,
    'env.id':
    Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
    'env.standardize_obs':
    False,
    'nn.sizes': [64, 64],

    # only for continuous control
    'env.clip_action':
    True,  # clip action within valid bound before step()
    'agent.std0':
    0.5,  # initial std
    'train.generations':
    int(1e3),  # total number of ES generations
    'train.popsize':
    64,
    'train.mu0':
    0.0,
    'train.std0':
    1.0,
})
def test_config():
    config = Config()

    config.add_item(name='algo', val='RL')
    config.add_item(name='iter', val=30)
    config.add_item(name='hidden_sizes', val=[64, 32, 16])

    config.add_grid(name='cuda_num', val=[1, 2, 3])

    config.add_random_eps(name='lr', base=10, low=-6, high=0, num_sample=10)
    config.add_random_continuous(name='values', low=-5, high=5, num_sample=5)
    config.add_random_discrete(name='select',
                               list_val=[43223, 5434, 21314],
                               num_sample=10,
                               replace=True)

    configs = config.make_configs()

    assert len(configs) == 1500

    assert np.alltrue([config['ID'] == i for i, config in enumerate(configs)])

    assert np.alltrue([config['algo'] == 'RL' for config in configs])

    assert np.alltrue(['iter' in config for config in configs])

    assert np.alltrue(
        [config['hidden_sizes'] == [64, 32, 16] for config in configs])

    assert np.alltrue([config['cuda_num'] in [1, 2, 3] for config in configs])

    assert np.alltrue(
        [config['lr'] > 0 and config['lr'] < 1 for config in configs])

    assert np.alltrue([
        config['values'] >= -5 and config['values'] < 5 for config in configs
    ])

    assert np.alltrue(
        [config['select'] in [43223, 5434, 21314] for config in configs])

    with pytest.raises(AssertionError):
        config.add_grid(name='wrong', val='yes')
示例#8
0
from baselines.cem.agent import Agent

config = Config({
    'log.freq':
    10,
    'checkpoint.num':
    3,
    'env.id':
    Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
    'env.standardize_obs':
    False,
    'nn.sizes': [64, 64],

    # only for continuous control
    'env.clip_action':
    True,  # clip action within valid bound before step()
    'agent.std0':
    0.6,  # initial std
    'train.generations':
    int(1e3),  # total number of ES generations
    'train.popsize':
    64,
    'train.mu0':
    0.0,
    'train.std0':
    1.0,
    'train.elite_ratio':
    0.2,
    'train.noise_scheduler_args': [0.01, 0.001, 200, 0]
})

示例#9
0
    def make_configs(self):
        config = Config()

        # General configurations
        config.add_item(name='cuda', val=True)
        config.add_item(name='cuda_id', val=1)
        # Random seeds
        # Generated by: np.random.randint(0, np.iinfo(np.int32).max, 10)
        config.add_grid(name='seed',
                        val=[
                            1284204222, 1079618558, 310837894, 1130644153,
                            2099771862, 1234806135, 92464293, 146053987,
                            1140885110, 988661500
                        ])

        # Algorithm configuration
        config.add_item(name='algo:lr', val=1e-3)
        config.add_item(name='algo:gamma', val=0.99)
        config.add_item(name='algo:use_lr_scheduler', val=True)

        config.add_item(name='env:id', val='CartPole-v1')

        # Engine configuration
        config.add_item(name='train:iter', val=4000)
        config.add_item(name='train:N', val=1)
        config.add_item(name='train:T', val=500)

        config.add_item(name='eval:N', val=10)
        config.add_item(name='eval:T', val=500)

        # Agent configuration
        config.add_grid(name='agent:standardize', val=[True, False])
        config.add_item(name='agent:max_grad_norm',
                        val=0.5)  # gradient clipping with max gradient norm
        config.add_item(name='agent:value_coef',
                        val=0.5)  # Coefficient for learning value function
        config.add_item(name='agent:entropy_coef',
                        val=0.01)  # Coefficient for maximize policy entropy

        # Logging configurations
        config.add_item(name='log:interval', val=100)
        config.add_item(name='log:dir', val='logs')

        configs = config.make_configs()

        return configs
示例#10
0
config = Config({
    'log.freq':
    10,
    'checkpoint.num':
    3,
    'env.id':
    Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
    'agent.gamma':
    0.99,
    'agent.polyak':
    0.995,  # polyak averaging coefficient for targets update
    'agent.actor.lr':
    3e-4,
    'agent.actor.use_lr_scheduler':
    False,
    'agent.critic.lr':
    3e-4,
    'agent.critic.use_lr_scheduler':
    False,
    'agent.initial_temperature':
    1.0,
    'agent.max_grad_norm':
    999999,  # grad clipping by norm
    'replay.capacity':
    1000000,
    'replay.init_trial':
    10,  # number of random rollouts initially
    'replay.batch_size':
    256,
    'train.timestep':
    int(1e6),  # total number of training (environmental) timesteps
    'eval.num':
    200
})
示例#11
0
from lagom.envs import TimeStepEnv

from lagom import CEM
from baselines.cem.agent import Agent


config = Config(
    {'log.freq': 10, 
     'checkpoint.num': 3,
     
     'env.id': Grid(['Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2']), 
     
     'nn.sizes': [64, 64],
     
     # only for continuous control
     'env.clip_action': True,  # clip action within valid bound before step()
     'agent.std0': 0.6,  # initial std
     
     'train.generations': 500,  # total number of ES generations
     'train.popsize': 32,
     'train.worker_chunksize': 4,  # must be divisible by popsize
     'train.mu0': 0.0,
     'train.std0': 1.0,
     'train.elite_ratio': 0.2,
     'train.noise_scheduler_args': [0.01, 0.001, 400, 0]  # [initial, final, N, start]
    })


def make_env(config, seed, mode):
    assert mode in ['train', 'eval']
    env = gym.make(config['env.id'])
    env.seed(seed)
示例#12
0
config = Config({
    'log.freq':
    10,
    'checkpoint.num':
    3,
    'env.id':
    Grid([
        'Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0',
        'LunarLanderContinuous-v2'
    ]),
    'nn.sizes': [64, 64],

    # only for continuous control
    'env.clip_action':
    True,  # clip action within valid bound before step()
    'agent.std0':
    0.6,  # initial std
    'train.generations':
    500,  # total number of ES generations
    'train.popsize':
    32,
    'train.worker_chunksize':
    4,  # must be divisible by popsize
    'train.mu0':
    0.0,
    'train.std0':
    1.0,
    'train.lr':
    1e-2,
    'train.lr_decay':
    1.0,
    'train.min_lr':
    1e-6,
    'train.sigma_scheduler_args': [1.0, 0.01, 400, 0],
    'train.antithetic':
    False,
    'train.rank_transform':
    True
})
示例#13
0
    def make_configs(self):
        config = Config()

        config.add_grid(name='cuda', val=[True])
        config.add_item(name='seed', val=1)

        config.add_item(name='lr', val=1e-2)
        config.add_item(name='gamma', val=0.995)
        config.add_item(name='standardize_r', val=True)
        config.add_item(name='train_iter', val=1000)
        config.add_item(name='N', val=1)
        config.add_item(name='T', val=300)

        config.add_item(name='use_value', val=True)  # True for actor-critic
        config.add_item(name='entropy_coef', val=0.0)
        config.add_item(name='value_coef', val=0.0)

        config.add_item(name='max_grad_norm', val=None)

        config.add_item(name='log_interval', val=100)

        configs = config.make_configs()

        return configs
示例#14
0
def test_config(num_sample, keep_dict_order):
    with pytest.raises(AssertionError):
        Config([1, 2, 3])

    config = Config(
        {
            'log.dir': 'some path',
            'beta':
            Condition(lambda x: 'small' if x['alpha'] < 5 else 'large'),
            'network.type': 'MLP',
            'network.hidden_size': [64, 64],
            'network.lr': Grid([1e-3, 1e-4]),
            'env.id': 'HalfCheetah-v2',
            'iter': Grid([10, 20, 30]),
            'alpha': Sample(lambda: np.random.uniform(3, 10))
        },
        num_sample=num_sample,
        keep_dict_order=keep_dict_order)
    list_config = config.make_configs()

    assert len(list_config) == 2 * 3 * num_sample
    for ID in range(2 * 3 * num_sample):
        assert list_config[ID]['ID'] == ID

    for x in list_config:
        assert len(
            x.keys()) == len(config.items.keys()) + 1  # added one more 'ID'
        for key in config.items.keys():
            assert key in x
        assert x['log.dir'] == 'some path'
        if x['alpha'] < 5:
            assert x['beta'] == 'small'
        else:
            assert x['beta'] == 'large'
        assert x['network.type'] == 'MLP'
        assert x['network.hidden_size'] == [64, 64]
        assert x['network.lr'] in [1e-3, 1e-4]
        assert x['env.id'] == 'HalfCheetah-v2'
        assert x['iter'] in [10, 20, 30]
        assert x['alpha'] >= 3 and x['alpha'] < 10

        if keep_dict_order:
            assert list(x.keys()) == ['ID'] + list(config.items.keys())
        else:
            assert list(x.keys()) != ['ID'] + list(config.items.keys())
            assert list(x.keys()) == [
                'ID', 'log.dir', 'beta', 'network.type', 'network.hidden_size',
                'env.id', 'network.lr', 'iter', 'alpha'
            ]

    # test for non-random sampling
    config = Config(
        {
            'log.dir': 'some path',
            'network.type': 'MLP',
            'network.hidden_size': [64, 64],
            'network.lr': Grid([1e-3, 1e-4]),
            'env.id': 'HalfCheetah-v2',
            'iter': Grid([10, 20, 30]),
            'alpha': 0.1
        },
        num_sample=num_sample,
        keep_dict_order=keep_dict_order)
    list_config = config.make_configs()
    assert len(
        list_config
    ) == 2 * 3 * 1  # no matter how many num_sample, without repetition
    for ID in range(2 * 3 * 1):
        assert list_config[ID]['ID'] == ID

    # test for all fixed
    config = Config(
        {
            'log.dir': 'some path',
            'network.type': 'MLP',
            'network.hidden_size': [64, 64],
            'network.lr': 1e-3,
            'env.id': 'HalfCheetah-v2',
            'iter': 20,
            'alpha': 0.1
        },
        num_sample=num_sample,
        keep_dict_order=keep_dict_order)
    list_config = config.make_configs()
    assert len(
        list_config) == 1  # no matter how many num_sample, without repetition
    for ID in range(1):
        assert list_config[ID]['ID'] == ID
示例#15
0
from baselines.sac.replay_buffer import ReplayBuffer


config = Config(
    {'log.freq': 1000,  # every n timesteps
     'checkpoint.num': 3,
     
     'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
     
     'agent.gamma': 0.99,
     'agent.polyak': 0.995,  # polyak averaging coefficient for targets update
     'agent.actor.lr': 3e-4, 
     'agent.actor.use_lr_scheduler': False,
     'agent.critic.lr': 3e-4,
     'agent.critic.use_lr_scheduler': False,
     'agent.initial_temperature': 1.0,
     'agent.max_grad_norm': 999999,  # grad clipping by norm
     
     'replay.capacity': 1000000, 
     # number of time steps to take uniform actions initially
     'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000),
     'replay.batch_size': 256,
     
     'train.timestep': int(1e6),  # total number of training (environmental) timesteps
     'eval.freq': 5000,
     'eval.num_episode': 10
     
    })


def make_env(config, seed):
示例#16
0
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import run_experiment

from engine import Engine
from model import VAE
from model import ConvVAE


config = Config(
    {'log.freq': 100, 
     
     'nn.type': Grid(['VAE', 'ConvVAE']),
     'nn.z_dim': 8,
     
     'lr': 1e-3,
     
     'train.num_epoch': 100,
     'train.batch_size': 128, 
     'eval.batch_size': 128
    })


def make_dataset(config):
    train_dataset = datasets.MNIST('data/', 
                                   train=True, 
                                   download=True, 
                                   transform=transforms.ToTensor())
    test_dataset = datasets.MNIST('data/', 
                                  train=False, 
                                  transform=transforms.ToTensor())
示例#17
0
from baselines.vpg.agent_lstm import Agent as LSTMAgent
from baselines.vpg.engine import Engine

config = Config({
    'log.freq': 10,
    'checkpoint.num': 3,
    'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3']),
    'env.normalize_obs': True,
    'env.normalize_reward': True,
    'use_lstm': Grid([True, False]),
    'rnn.size': 128,
    'nn.sizes': [64, 64],
    'agent.lr': 1e-3,
    'agent.use_lr_scheduler': False,
    'agent.gamma': 0.99,
    'agent.gae_lambda': 0.97,
    'agent.standardize_adv': True,  # standardize advantage estimates
    'agent.max_grad_norm': 0.5,  # grad clipping by norm
    'agent.entropy_coef': 0.01,
    'agent.value_coef': 0.5,

    # only for continuous control
    'env.clip_action': True,  # clip action within valid bound before step()
    'agent.std0': 0.6,  # initial std
    'train.timestep':
    int(1e6),  # total number of training (environmental) timesteps
    'train.timestep_per_iter': 1000,  # number of timesteps per iteration
})


def make_env(config, seed, mode):
示例#18
0
config = Config({
    'log.freq':
    10,
    'checkpoint.num':
    3,
    'env.id':
    Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
    'env.normalize_obs':
    True,
    'env.normalize_reward':
    True,
    'nn.sizes': [64, 64],
    'agent.policy_lr':
    3e-4,
    'agent.use_lr_scheduler':
    True,
    'agent.value_lr':
    1e-3,
    'agent.gamma':
    0.99,
    'agent.gae_lambda':
    0.95,
    'agent.standardize_adv':
    True,  # standardize advantage estimates
    'agent.max_grad_norm':
    0.5,  # grad clipping by norm
    'agent.clip_range':
    0.2,  # ratio clipping

    # only for continuous control
    'env.clip_action':
    True,  # clip action within valid bound before step()
    'agent.std0':
    0.6,  # initial std
    'train.timestep':
    int(1e6),  # total number of training (environmental) timesteps
    'train.timestep_per_iter':
    2048,  # number of timesteps per iteration
    'train.batch_size':
    64,
    'train.num_epochs':
    10
})
    def make_configs(self):
        config = Config()

        config.add_item(name='iter', val=30)
        config.add_item(name='hidden_sizes', val=[64, 32, 16])
        config.add_random_eps(name='lr', base=10, low=-6, high=0, num_sample=10)
        config.add_random_continuous(name='values', low=-5, high=5, num_sample=5)
        config.add_random_discrete(name='select', list_val=[43223, 5434, 21314], num_sample=10, replace=True)

        configs = config.make_configs()

        return configs
示例#20
0
from baselines.openaies.agent import Agent


config = Config(
    {'log.freq': 10, 
     'checkpoint.num': 3,
     
     'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 
     'env.standardize_obs': False,
     
     'nn.sizes': [64, 64],
     
     # only for continuous control
     'env.clip_action': True,  # clip action within valid bound before step()
     'agent.std0': 0.6,  # initial std
     
     'train.generations': int(1e3),  # total number of ES generations
     'train.popsize': 64,
     'train.mu0': 0.0,
     'train.std0': 1.0,
     'train.lr': 1e-2,
     'train.lr_decay': 1.0,
     'train.min_lr': 1e-6,
     'train.sigma_scheduler_args': [1.0, 0.01, 450, 0],
     'train.antithetic': False,
     'train.rank_transform': True
     
    })


def make_env(config, seed):
示例#21
0
config = Config({
    'log.freq': 1,
    'checkpoint.num': 3,
    'env.id':
    'Hopper-v3',  ###Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 
    'env.standardize_obs': False,
    'env.standardize_reward': False,
    'nn.sizes': [64, 64],
    'agent.lr': 7e-4,
    'agent.use_lr_scheduler': False,
    'agent.gamma': 0.99,
    'agent.standardize_adv': True,  # standardize advantage estimates
    'agent.max_grad_norm': 0.5,  # grad clipping by norm
    'agent.entropy_coef': 0.01,
    'agent.value_coef': 0.5,
    'agent.clip_rho': 1.0,
    'agent.clip_pg_rho': 1.0,
    'agent.num_actors': 2,

    # only for continuous control
    'env.clip_action': True,  # clip action within valid bound before step()
    'agent.std0': 0.6,  # initial std
    'train.timestep':
    int(1e6),  # total number of training (environmental) timesteps
    'train.timestep_per_iter': 200,
    'train.batch_size': 64,  # number of timesteps per iteration
    'eval.freq': 5000,
    'eval.num_episode': 10
})