def work(self, master_cmd): task_id, task, seed = master_cmd # Don't use this seed # Set seed inside config config = task # Instantiate an algorithm algo = self.make_algo() # Print configuration Config.print_config(config) # Run the algorithm with given configuration result = algo(config) return task_id, result
def test_run_experiment(num_sample, max_workers, chunksize): def run(config, seed, device, logdir): return config['ID'], seed, device, logdir config = Config( { 'network.lr': Grid([1e-3, 5e-3]), 'network.size': [32, 16], 'env.id': Grid(['CartPole-v1', 'Ant-v2']) }, num_sample=num_sample, keep_dict_order=True) seeds = [1, 2, 3] log_dir = './some_path' run_experiment(run, config, seeds, log_dir, max_workers, chunksize, use_gpu=False, gpu_ids=None) p = Path('./some_path') assert p.exists() assert (p / 'configs.pkl').exists() assert (p / 'source_files').exists() and (p / 'source_files').is_dir() # Check all configuration folders with their IDs and subfolders for all random seeds for i in range(4): config_p = p / str(i) assert config_p.exists() assert (config_p / 'config.yml').exists() for seed in seeds: assert (config_p / str(seed)).exists() # Clean the logging directory rmtree(p) # Test remove assert not p.exists()
def make_configs(self): config = Config() config.add_grid(name='use_ConvVAE', val=[True, False]) config.add_item(name='num_epochs', val=100) config.add_item(name='cuda', val=True) config.add_item(name='seed', val=1) config.add_item(name='batch_size', val=128) config.add_item(name='log_interval', val=100) configs = config.make_configs() return configs
from lagom.utils import pickle_dump from lagom.utils import set_global_seeds from lagom.experiment import Config from lagom.experiment import Grid from lagom.experiment import Sample from lagom.experiment import run_experiment from engine import Engine from model import VAE from model import ConvVAE config = Config({ 'cuda': True, 'log.dir': 'logs/ConvVAE', 'log.interval': 100, 'nn.type': 'ConvVAE', 'nn.z_dim': 8, 'train.num_epoch': 100, 'train.batch_size': 128, 'eval.batch_size': 128 }) def make_dataset(config): train_dataset = datasets.MNIST('data/', train=True, download=True, transform=transforms.ToTensor()) test_dataset = datasets.MNIST('data/', train=False, transform=transforms.ToTensor()) kwargs = {'num_workers': 1, 'pin_memory': True} if config['cuda'] else {}
config = Config( {'cuda': True, 'log.dir': 'logs/default_new', 'log.freq': 10, 'checkpoint.freq': 50, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.standardize_obs': True, 'env.standardize_reward': True, 'nn.sizes': [64, 64], 'agent.lr': 3e-4, 'agent.use_lr_scheduler': True, 'agent.gamma': 0.99, 'agent.gae_lambda': 0.95, 'agent.standardize_adv': True, # standardize advantage estimates 'agent.max_grad_norm': 0.5, # grad clipping by norm 'agent.entropy_coef': 0.0, # PPO: no entropy bobus 'agent.value_coef': 0.5, 'agent.clip_range': 0.2, # ratio clipping # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.5, # initial std 'agent.std_style': 'exp', # std parameterization 'agent.std_range': None, # bounded std: (min, max) 'agent.beta': None, # beta-sigmoidal 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'train.timestep_per_iter': 2048, # number of timesteps per iteration 'train.batch_size': 64, 'train.num_epochs': 10 })
config = Config({ 'cuda': False, 'log.dir': 'logs/default', 'log.freq': 10, 'checkpoint.freq': 50, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.standardize_obs': False, 'nn.sizes': [64, 64], # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.5, # initial std 'train.generations': int(1e3), # total number of ES generations 'train.popsize': 64, 'train.mu0': 0.0, 'train.std0': 1.0, })
def test_config(): config = Config() config.add_item(name='algo', val='RL') config.add_item(name='iter', val=30) config.add_item(name='hidden_sizes', val=[64, 32, 16]) config.add_grid(name='cuda_num', val=[1, 2, 3]) config.add_random_eps(name='lr', base=10, low=-6, high=0, num_sample=10) config.add_random_continuous(name='values', low=-5, high=5, num_sample=5) config.add_random_discrete(name='select', list_val=[43223, 5434, 21314], num_sample=10, replace=True) configs = config.make_configs() assert len(configs) == 1500 assert np.alltrue([config['ID'] == i for i, config in enumerate(configs)]) assert np.alltrue([config['algo'] == 'RL' for config in configs]) assert np.alltrue(['iter' in config for config in configs]) assert np.alltrue( [config['hidden_sizes'] == [64, 32, 16] for config in configs]) assert np.alltrue([config['cuda_num'] in [1, 2, 3] for config in configs]) assert np.alltrue( [config['lr'] > 0 and config['lr'] < 1 for config in configs]) assert np.alltrue([ config['values'] >= -5 and config['values'] < 5 for config in configs ]) assert np.alltrue( [config['select'] in [43223, 5434, 21314] for config in configs]) with pytest.raises(AssertionError): config.add_grid(name='wrong', val='yes')
from baselines.cem.agent import Agent config = Config({ 'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.standardize_obs': False, 'nn.sizes': [64, 64], # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.generations': int(1e3), # total number of ES generations 'train.popsize': 64, 'train.mu0': 0.0, 'train.std0': 1.0, 'train.elite_ratio': 0.2, 'train.noise_scheduler_args': [0.01, 0.001, 200, 0] })
def make_configs(self): config = Config() # General configurations config.add_item(name='cuda', val=True) config.add_item(name='cuda_id', val=1) # Random seeds # Generated by: np.random.randint(0, np.iinfo(np.int32).max, 10) config.add_grid(name='seed', val=[ 1284204222, 1079618558, 310837894, 1130644153, 2099771862, 1234806135, 92464293, 146053987, 1140885110, 988661500 ]) # Algorithm configuration config.add_item(name='algo:lr', val=1e-3) config.add_item(name='algo:gamma', val=0.99) config.add_item(name='algo:use_lr_scheduler', val=True) config.add_item(name='env:id', val='CartPole-v1') # Engine configuration config.add_item(name='train:iter', val=4000) config.add_item(name='train:N', val=1) config.add_item(name='train:T', val=500) config.add_item(name='eval:N', val=10) config.add_item(name='eval:T', val=500) # Agent configuration config.add_grid(name='agent:standardize', val=[True, False]) config.add_item(name='agent:max_grad_norm', val=0.5) # gradient clipping with max gradient norm config.add_item(name='agent:value_coef', val=0.5) # Coefficient for learning value function config.add_item(name='agent:entropy_coef', val=0.01) # Coefficient for maximize policy entropy # Logging configurations config.add_item(name='log:interval', val=100) config.add_item(name='log:dir', val='logs') configs = config.make_configs() return configs
config = Config({ 'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'agent.gamma': 0.99, 'agent.polyak': 0.995, # polyak averaging coefficient for targets update 'agent.actor.lr': 3e-4, 'agent.actor.use_lr_scheduler': False, 'agent.critic.lr': 3e-4, 'agent.critic.use_lr_scheduler': False, 'agent.initial_temperature': 1.0, 'agent.max_grad_norm': 999999, # grad clipping by norm 'replay.capacity': 1000000, 'replay.init_trial': 10, # number of random rollouts initially 'replay.batch_size': 256, 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'eval.num': 200 })
from lagom.envs import TimeStepEnv from lagom import CEM from baselines.cem.agent import Agent config = Config( {'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2']), 'nn.sizes': [64, 64], # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.generations': 500, # total number of ES generations 'train.popsize': 32, 'train.worker_chunksize': 4, # must be divisible by popsize 'train.mu0': 0.0, 'train.std0': 1.0, 'train.elite_ratio': 0.2, 'train.noise_scheduler_args': [0.01, 0.001, 400, 0] # [initial, final, N, start] }) def make_env(config, seed, mode): assert mode in ['train', 'eval'] env = gym.make(config['env.id']) env.seed(seed)
config = Config({ 'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid([ 'Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2' ]), 'nn.sizes': [64, 64], # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.generations': 500, # total number of ES generations 'train.popsize': 32, 'train.worker_chunksize': 4, # must be divisible by popsize 'train.mu0': 0.0, 'train.std0': 1.0, 'train.lr': 1e-2, 'train.lr_decay': 1.0, 'train.min_lr': 1e-6, 'train.sigma_scheduler_args': [1.0, 0.01, 400, 0], 'train.antithetic': False, 'train.rank_transform': True })
def make_configs(self): config = Config() config.add_grid(name='cuda', val=[True]) config.add_item(name='seed', val=1) config.add_item(name='lr', val=1e-2) config.add_item(name='gamma', val=0.995) config.add_item(name='standardize_r', val=True) config.add_item(name='train_iter', val=1000) config.add_item(name='N', val=1) config.add_item(name='T', val=300) config.add_item(name='use_value', val=True) # True for actor-critic config.add_item(name='entropy_coef', val=0.0) config.add_item(name='value_coef', val=0.0) config.add_item(name='max_grad_norm', val=None) config.add_item(name='log_interval', val=100) configs = config.make_configs() return configs
def test_config(num_sample, keep_dict_order): with pytest.raises(AssertionError): Config([1, 2, 3]) config = Config( { 'log.dir': 'some path', 'beta': Condition(lambda x: 'small' if x['alpha'] < 5 else 'large'), 'network.type': 'MLP', 'network.hidden_size': [64, 64], 'network.lr': Grid([1e-3, 1e-4]), 'env.id': 'HalfCheetah-v2', 'iter': Grid([10, 20, 30]), 'alpha': Sample(lambda: np.random.uniform(3, 10)) }, num_sample=num_sample, keep_dict_order=keep_dict_order) list_config = config.make_configs() assert len(list_config) == 2 * 3 * num_sample for ID in range(2 * 3 * num_sample): assert list_config[ID]['ID'] == ID for x in list_config: assert len( x.keys()) == len(config.items.keys()) + 1 # added one more 'ID' for key in config.items.keys(): assert key in x assert x['log.dir'] == 'some path' if x['alpha'] < 5: assert x['beta'] == 'small' else: assert x['beta'] == 'large' assert x['network.type'] == 'MLP' assert x['network.hidden_size'] == [64, 64] assert x['network.lr'] in [1e-3, 1e-4] assert x['env.id'] == 'HalfCheetah-v2' assert x['iter'] in [10, 20, 30] assert x['alpha'] >= 3 and x['alpha'] < 10 if keep_dict_order: assert list(x.keys()) == ['ID'] + list(config.items.keys()) else: assert list(x.keys()) != ['ID'] + list(config.items.keys()) assert list(x.keys()) == [ 'ID', 'log.dir', 'beta', 'network.type', 'network.hidden_size', 'env.id', 'network.lr', 'iter', 'alpha' ] # test for non-random sampling config = Config( { 'log.dir': 'some path', 'network.type': 'MLP', 'network.hidden_size': [64, 64], 'network.lr': Grid([1e-3, 1e-4]), 'env.id': 'HalfCheetah-v2', 'iter': Grid([10, 20, 30]), 'alpha': 0.1 }, num_sample=num_sample, keep_dict_order=keep_dict_order) list_config = config.make_configs() assert len( list_config ) == 2 * 3 * 1 # no matter how many num_sample, without repetition for ID in range(2 * 3 * 1): assert list_config[ID]['ID'] == ID # test for all fixed config = Config( { 'log.dir': 'some path', 'network.type': 'MLP', 'network.hidden_size': [64, 64], 'network.lr': 1e-3, 'env.id': 'HalfCheetah-v2', 'iter': 20, 'alpha': 0.1 }, num_sample=num_sample, keep_dict_order=keep_dict_order) list_config = config.make_configs() assert len( list_config) == 1 # no matter how many num_sample, without repetition for ID in range(1): assert list_config[ID]['ID'] == ID
from baselines.sac.replay_buffer import ReplayBuffer config = Config( {'log.freq': 1000, # every n timesteps 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'agent.gamma': 0.99, 'agent.polyak': 0.995, # polyak averaging coefficient for targets update 'agent.actor.lr': 3e-4, 'agent.actor.use_lr_scheduler': False, 'agent.critic.lr': 3e-4, 'agent.critic.use_lr_scheduler': False, 'agent.initial_temperature': 1.0, 'agent.max_grad_norm': 999999, # grad clipping by norm 'replay.capacity': 1000000, # number of time steps to take uniform actions initially 'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000), 'replay.batch_size': 256, 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'eval.freq': 5000, 'eval.num_episode': 10 }) def make_env(config, seed):
from lagom.experiment import Config from lagom.experiment import Grid from lagom.experiment import Sample from lagom.experiment import run_experiment from engine import Engine from model import VAE from model import ConvVAE config = Config( {'log.freq': 100, 'nn.type': Grid(['VAE', 'ConvVAE']), 'nn.z_dim': 8, 'lr': 1e-3, 'train.num_epoch': 100, 'train.batch_size': 128, 'eval.batch_size': 128 }) def make_dataset(config): train_dataset = datasets.MNIST('data/', train=True, download=True, transform=transforms.ToTensor()) test_dataset = datasets.MNIST('data/', train=False, transform=transforms.ToTensor())
from baselines.vpg.agent_lstm import Agent as LSTMAgent from baselines.vpg.engine import Engine config = Config({ 'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3']), 'env.normalize_obs': True, 'env.normalize_reward': True, 'use_lstm': Grid([True, False]), 'rnn.size': 128, 'nn.sizes': [64, 64], 'agent.lr': 1e-3, 'agent.use_lr_scheduler': False, 'agent.gamma': 0.99, 'agent.gae_lambda': 0.97, 'agent.standardize_adv': True, # standardize advantage estimates 'agent.max_grad_norm': 0.5, # grad clipping by norm 'agent.entropy_coef': 0.01, 'agent.value_coef': 0.5, # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'train.timestep_per_iter': 1000, # number of timesteps per iteration }) def make_env(config, seed, mode):
config = Config({ 'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.normalize_obs': True, 'env.normalize_reward': True, 'nn.sizes': [64, 64], 'agent.policy_lr': 3e-4, 'agent.use_lr_scheduler': True, 'agent.value_lr': 1e-3, 'agent.gamma': 0.99, 'agent.gae_lambda': 0.95, 'agent.standardize_adv': True, # standardize advantage estimates 'agent.max_grad_norm': 0.5, # grad clipping by norm 'agent.clip_range': 0.2, # ratio clipping # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'train.timestep_per_iter': 2048, # number of timesteps per iteration 'train.batch_size': 64, 'train.num_epochs': 10 })
def make_configs(self): config = Config() config.add_item(name='iter', val=30) config.add_item(name='hidden_sizes', val=[64, 32, 16]) config.add_random_eps(name='lr', base=10, low=-6, high=0, num_sample=10) config.add_random_continuous(name='values', low=-5, high=5, num_sample=5) config.add_random_discrete(name='select', list_val=[43223, 5434, 21314], num_sample=10, replace=True) configs = config.make_configs() return configs
from baselines.openaies.agent import Agent config = Config( {'log.freq': 10, 'checkpoint.num': 3, 'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.standardize_obs': False, 'nn.sizes': [64, 64], # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.generations': int(1e3), # total number of ES generations 'train.popsize': 64, 'train.mu0': 0.0, 'train.std0': 1.0, 'train.lr': 1e-2, 'train.lr_decay': 1.0, 'train.min_lr': 1e-6, 'train.sigma_scheduler_args': [1.0, 0.01, 450, 0], 'train.antithetic': False, 'train.rank_transform': True }) def make_env(config, seed):
config = Config({ 'log.freq': 1, 'checkpoint.num': 3, 'env.id': 'Hopper-v3', ###Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']), 'env.standardize_obs': False, 'env.standardize_reward': False, 'nn.sizes': [64, 64], 'agent.lr': 7e-4, 'agent.use_lr_scheduler': False, 'agent.gamma': 0.99, 'agent.standardize_adv': True, # standardize advantage estimates 'agent.max_grad_norm': 0.5, # grad clipping by norm 'agent.entropy_coef': 0.01, 'agent.value_coef': 0.5, 'agent.clip_rho': 1.0, 'agent.clip_pg_rho': 1.0, 'agent.num_actors': 2, # only for continuous control 'env.clip_action': True, # clip action within valid bound before step() 'agent.std0': 0.6, # initial std 'train.timestep': int(1e6), # total number of training (environmental) timesteps 'train.timestep_per_iter': 200, 'train.batch_size': 64, # number of timesteps per iteration 'eval.freq': 5000, 'eval.num_episode': 10 })