Exemplo n.º 1
0
    def make_configs(self):
        configurator = Configurator('grid')

        configurator.fixed('cuda', False)  # ES for small net, do not use GPU

        configurator.fixed('env.id', 'Pendulum-v0')

        configurator.fixed('network.recurrent', True)
        configurator.fixed('network.hidden_size', [32])

        configurator.fixed('es.algo', 'CMAES')
        configurator.fixed('es.popsize', 16)
        configurator.fixed('es.mu0', 0.0)
        configurator.fixed('es.std0', 0.5)
        """Hyperparameter search, later time
        configurator.grid('env.id', ['Pendulum-v0', 'Reacher-v2', 'InvertedPendulum-v2', 'HumanoidStandup-v2'])
        
        configurator.grid('network.hidden_size', [[32], [32, 32], [64, 64]])
        
        configurator.grid('es.popsize', [8, 16, 32, 64])
        configurator.grid('es.mu0', [0.0, 0.3, 0.5])
        configurator.grid('es.std0', [0.1, 0.5, 1.0])
        """
        configurator.fixed('train.num_iteration', 3000)
        configurator.fixed('train.N', 5)
        configurator.fixed('train.T', 300)

        configurator.fixed('log.interval', 100)
        configurator.fixed('log.dir', 'logs')

        list_config = configurator.make_configs()

        return list_config
Exemplo n.º 2
0
    def make_configs(self):
        configurator = Configurator('grid')

        configurator.fixed('cuda', True)

        configurator.grid('network.type', ['VAE', 'ConvVAE'])
        configurator.fixed('network.z_dim', 8)

        configurator.fixed('train.num_epoch', 100)
        configurator.fixed('train.batch_size', 128)
        configurator.fixed('eval.batch_size', 128)

        configurator.fixed('log.interval', 100)
        configurator.fixed('log.dir', 'logs')

        list_config = configurator.make_configs()

        return list_config
Exemplo n.º 3
0
    def make_configs(self):
        configurator = Configurator('grid')

        configurator.fixed('cuda', True)  # whether to use GPU

        configurator.fixed('env.id', 'HalfCheetah-v2')
        configurator.fixed('env.standardize',
                           True)  # whether to use VecStandardize
        configurator.fixed('env.time_aware_obs',
                           False)  # whether to append time step to observation

        configurator.fixed('network.recurrent', False)
        configurator.fixed('network.hidden_sizes', [64, 64])  # TODO: [64, 64]
        configurator.fixed(
            'network.independent_V',
            False)  # share or not for params of policy and value network

        configurator.fixed('algo.lr', 3e-4)
        configurator.fixed('algo.lr_V', 1e-3)
        configurator.fixed('algo.use_lr_scheduler', True)
        configurator.fixed('algo.gamma', 0.99)
        configurator.fixed('algo.gae_lambda', 0.97)

        configurator.fixed('agent.standardize_Q',
                           False)  # whether to standardize discounted returns
        configurator.fixed('agent.standardize_adv',
                           True)  # whether to standardize advantage estimates
        configurator.fixed('agent.max_grad_norm',
                           0.5)  # grad clipping, set None to turn off
        configurator.fixed('agent.entropy_coef', 0.0)
        configurator.fixed('agent.value_coef', 0.5)
        configurator.fixed('agent.fit_terminal_value', True)
        configurator.fixed('agent.terminal_value_coef', 0.1)
        configurator.fixed('agent.clip_range',
                           0.2)  # PPO epsilon of ratio clipping
        configurator.fixed(
            'agent.target_kl', 0.015
        )  # appropriate KL between new and old policies after an update, for early stopping (Usually small, e.g. 0.01, 0.05)
        # only for continuous control
        configurator.fixed(
            'env.clip_action',
            True)  # clip sampled action within valid bound before step()
        configurator.fixed(
            'agent.min_std',
            1e-6)  # min threshould for std, avoid numerical instability
        configurator.fixed('agent.std_style',
                           'exp')  # std parameterization, 'exp' or 'softplus'
        configurator.fixed('agent.constant_std',
                           None)  # constant std, set None to learn it
        configurator.fixed('agent.std_state_dependent',
                           False)  # whether to learn std with state dependency
        configurator.fixed('agent.init_std',
                           0.5)  # initial std for state-independent std

        configurator.fixed('train.timestep',
                           1e6)  # either 'train.iter' or 'train.timestep'
        configurator.fixed('train.N',
                           2)  # number of trajectories per training iteration
        configurator.fixed('train.ratio_T',
                           1.0)  # percentage of max allowed horizon
        configurator.fixed('eval.independent', False)
        configurator.fixed(
            'eval.N', 10
        )  # number of episodes to evaluate, do not specify T for complete episode
        configurator.fixed('train.batch_size', 256)
        configurator.fixed('train.num_epochs', 80)

        configurator.fixed('log.interval', 10)  # logging interval
        configurator.fixed('log.dir', 'logs/default')  # logging directory

        list_config = configurator.make_configs()

        return list_config
Exemplo n.º 4
0
    def make_configs(self):
        configurator = Configurator('grid')

        configurator.fixed('cuda', False)  # whether to use GPU

        configurator.fixed('env.id', 'HalfCheetah-v2')
        configurator.fixed('env.standardize',
                           True)  # whether to use VecStandardize

        configurator.fixed('network.recurrent', True)
        configurator.fixed('network.hidden_sizes', [32])  # TODO: [64, 64]

        configurator.fixed('algo.lr', 1e-3)
        configurator.fixed('algo.use_lr_scheduler', False)
        configurator.fixed('algo.gamma', 0.99)

        configurator.fixed('agent.standardize_Q',
                           False)  # whether to standardize discounted returns
        configurator.fixed('agent.standardize_adv',
                           False)  # whether to standardize advantage estimates
        configurator.fixed('agent.max_grad_norm',
                           0.5)  # grad clipping, set None to turn off
        configurator.fixed('agent.entropy_coef', 0.01)
        configurator.fixed('agent.value_coef', 0.5)
        # only for continuous control
        configurator.fixed(
            'agent.min_std',
            1e-6)  # min threshould for std, avoid numerical instability
        configurator.fixed('agent.std_style',
                           'exp')  # std parameterization, 'exp' or 'softplus'
        configurator.fixed('agent.constant_std',
                           None)  # constant std, set None to learn it
        configurator.fixed('agent.std_state_dependent',
                           False)  # whether to learn std with state dependency
        configurator.fixed('agent.init_std',
                           0.5)  # initial std for state-independent std

        configurator.fixed('train.timestep',
                           1e7)  # either 'train.iter' or 'train.timestep'
        configurator.fixed('train.N',
                           10)  # number of segments per training iteration
        configurator.fixed('train.T', 5)  # fixed-length segment rolling
        configurator.fixed(
            'eval.N', 100
        )  # number of episodes to evaluate, do not specify T for complete episode

        configurator.fixed('log.record_interval',
                           100)  # interval to record the logging
        configurator.fixed('log.print_interval',
                           500)  # interval to print the logging to screen
        configurator.fixed('log.dir', 'logs')  # logging directory

        list_config = configurator.make_configs()

        return list_config
Exemplo n.º 5
0
    def make_configs(self):
        configurator = Configurator('grid')

        configurator.fixed('cuda', False)

        configurator.fixed('algo.lr', 7e-4)
        configurator.fixed('algo.lr_V', 1e-3)
        configurator.fixed('algo.gamma', 0.99)
        configurator.fixed('algo.gae_lambda', 0.97)
        configurator.fixed('env.count', 20)
        configurator.fixed('agent.count', 20)

        configurator.fixed('agent.standardize_Q', False)
        configurator.fixed('agent.standardize_adv', False)
        configurator.fixed('agent.max_grad_norm', 0.5)
        configurator.fixed('agent.entropy_coef', 0.01)
        configurator.fixed('agent.value_coef', 0.5)
        configurator.fixed('agent.fit_terminal_value', False)
        configurator.fixed('agent.terminal_value_coef', 0.1)

        configurator.fixed('train.iter', 10000)
        configurator.fixed('log.interval', 10)
        configurator.fixed('log.dir', 'logs-3')

        return configurator.make_configs()
Exemplo n.º 6
0
def test_configurator():
    # Construction invalidity check
    with pytest.raises(AssertionError):
        Configurator(search_mode='n')
    with pytest.raises(AssertionError):
        Configurator(search_mode='random', num_sample=None)

    # Create a configurator
    # Grid search
    configurator = Configurator(search_mode='grid')
    assert len(configurator.items) == 0

    with pytest.raises(AssertionError):
        configurator.fixed('seeds', [1, 2, 3])

    configurator.fixed('log.dir', 'some path')

    assert len(configurator.items) == 1
    assert isinstance(configurator.items['log.dir'], list)
    assert configurator.items['log.dir'][0] == 'some path'
    with pytest.raises(AssertionError):
        configurator.fixed('log.dir', 'second')
    with pytest.raises(AssertionError):
        configurator.grid('log.T', 'must be list, not string')
    configurator.grid('network.lr', [1e-2, 5e-3, 1e-4, 5e-4])
    configurator.grid('network.layers', [1, 2, 3])
    configurator.grid('env.id', ['CartPole-v1', 'Ant-v2'])

    configs = configurator.make_configs()

    assert len(configs) == 24
    # order-preserving check
    assert all([list(c.keys()) == ['ID', 'log.dir', 'network.lr', 'network.layers', 'env.id'] for c in configs])
    assert all([c['log.dir'] == 'some path' for c in configs])
    assert all([c['network.lr'] in [1e-2, 5e-3, 1e-4, 5e-4] for c in configs])
    assert all([c['network.layers'] in [1, 2, 3] for c in configs])
    assert all([c['env.id'] in ['CartPole-v1', 'Ant-v2'] for c in configs])

    # Grid search does not allow methods for random search
    with pytest.raises(AssertionError):
        configurator.categorical('one', [1, 2])
    with pytest.raises(AssertionError):
        configurator.uniform('two', 1, 3)
    with pytest.raises(AssertionError):
        configurator.discrete_uniform('three', 5, 10)
    with pytest.raises(AssertionError):
        configurator.log_uniform('four', 0.0001, 0.1)

    Configurator.print_config(configs[20])
    config_dataframe = Configurator.to_dataframe(configs)
    config_dataframe = Configurator.dataframe_subset(config_dataframe, 'network.lr', [0.01, 0.005])
    config_dataframe = Configurator.dataframe_groupview(config_dataframe, ['env.id', 'network.lr'])

    del configurator
    del configs
    del config_dataframe

    # Random search
    configurator = Configurator('random', num_sample=20)
    assert len(configurator.items) == 0

    with pytest.raises(AssertionError):
        configurator.fixed('seeds', [1, 2, 3])
    with pytest.raises(AssertionError):
        configurator.categorical('seeds', [1, 2])
    with pytest.raises(AssertionError):
        configurator.uniform('seeds', 1, 3)
    with pytest.raises(AssertionError):
        configurator.discrete_uniform('seeds', 5, 10)
    with pytest.raises(AssertionError):
        configurator.log_uniform('seeds', 0.0001, 0.1)

    with pytest.raises(AssertionError):
        configurator.grid('network.layers', [1, 2, 3])

    configurator.fixed('log.dir', 'some path')
    assert len(configurator.items) == 1
    assert isinstance(configurator.items['log.dir'], typing.Generator)
    assert next(configurator.items['log.dir']) == 'some path'
    with pytest.raises(AssertionError):
        configurator.fixed('log.dir', 'second')

    configurator.categorical('network.layers', [1, 2, 3])
    with pytest.raises(AssertionError):
        configurator.categorical('network.layers2', 12)  # must be list
    assert isinstance(configurator.items['network.layers'], typing.Generator)

    configurator.uniform('entropy_coef', 0.1, 2.0)
    configurator.discrete_uniform('train.N', 1, 100)
    configurator.log_uniform('network.lr', 1e-7, 1e-1)
    assert len(configurator.items) == 5

    configs = configurator.make_configs()

    assert len(configs[0]) == 1+5

    assert len(configs) == 20
    # order-preserving check
    l = ['ID', 'log.dir', 'network.layers', 'entropy_coef', 'train.N', 'network.lr']
    assert all([list(c.keys()) == l for c in configs])
    assert all([c['log.dir'] == 'some path' for c in configs])
    assert all([c['network.layers'] in [1, 2, 3] for c in configs])
    assert all([c['entropy_coef'] >= 0.1 and c['entropy_coef'] <= 2.0 for c in configs])
    assert all([isinstance(c['train.N'], int) for c in configs])
    assert all([c['train.N'] >= 1 and c['train.N'] <= 100 for c in configs])
    assert all([isinstance(c['network.lr'], float) for c in configs])
    assert all([c['network.lr'] >= 1e-7 and c['network.lr'] <= 1e-1 for c in configs])

    Configurator.print_config(configs[11])
    config_dataframe = Configurator.to_dataframe(configs)
    config_dataframe = Configurator.dataframe_subset(config_dataframe, 'network.layers', [1, 2])
    config_dataframe = Configurator.dataframe_groupview(config_dataframe, ['network.layers', 'log.dir'])
    config_dataframe
Exemplo n.º 7
0
    def make_configs(self):
        configurator = Configurator('grid')
        
        configurator.fixed('log.dir', 'some path')
        configurator.grid('network.lr', [0.1, 0.01, 0.05])
        configurator.grid('network.layers', [16, 32])
        configurator.grid('env.id', ['CartPole-v1', 'Ant-v2', 'HalfCheetah-v2'])
        
        configs = configurator.make_configs()

        return configs