Exemplo n.º 1
0
def sample_td3_params(trial):
    """
    Sampler for TD3 hyperparams.

    :param trial: (optuna.trial)
    :return: (dict)
    """
    gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 100, 128, 256, 512])
    buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)])
    train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 1000, 2000])
    gradient_steps = train_freq
    noise_type = trial.suggest_categorical('noise_type', ['ornstein-uhlenbeck', 'normal'])
    noise_std = trial.suggest_uniform('noise_std', 0, 1)

    hyperparams = {
        'gamma': gamma,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'buffer_size': buffer_size,
        'train_freq': train_freq,
        'gradient_steps': gradient_steps,
    }

    if noise_type == 'normal':
        hyperparams['action_noise'] = NormalActionNoise(mean=np.zeros(trial.n_actions),
                                                        sigma=noise_std * np.ones(trial.n_actions))
    elif noise_type == 'ornstein-uhlenbeck':
        hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(trial.n_actions),
                                                                   sigma=noise_std * np.ones(trial.n_actions))

    return hyperparams
Exemplo n.º 2
0
def sample_ddpg_params(trial):
    """
    Sampler for DDPG hyperparams.

    :param trial: (optuna.trial)
    :return: (dict)
    """
    gamma = trial.suggest_categorical(
        'gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    # actor_lr = trial.suggest_loguniform('actor_lr', 1e-5, 1)
    # critic_lr = trial.suggest_loguniform('critic_lr', 1e-5, 1)
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
    batch_size = trial.suggest_categorical('batch_size',
                                           [16, 32, 64, 128, 256])
    buffer_size = trial.suggest_categorical(
        'memory_limit', [int(1e4), int(1e5), int(1e6)])
    noise_type = trial.suggest_categorical(
        'noise_type', ['ornstein-uhlenbeck', 'normal', 'adaptive-param'])
    noise_std = trial.suggest_uniform('noise_std', 0, 1)
    normalize_observations = trial.suggest_categorical(
        'normalize_observations', [True, False])
    normalize_returns = trial.suggest_categorical('normalize_returns',
                                                  [True, False])

    hyperparams = {
        'gamma': gamma,
        'actor_lr': learning_rate,
        'critic_lr': learning_rate,
        'batch_size': batch_size,
        'memory_limit': buffer_size,
        'normalize_observations': normalize_observations,
        'normalize_returns': normalize_returns
    }

    if noise_type == 'adaptive-param':
        hyperparams['param_noise'] = AdaptiveParamNoiseSpec(
            initial_stddev=noise_std, desired_action_stddev=noise_std)
        # Apply layer normalization when using parameter perturbation
        hyperparams['policy_kwargs'] = dict(layer_norm=True)
    elif noise_type == 'normal':
        hyperparams['action_noise'] = NormalActionNoise(
            mean=np.zeros(trial.n_actions),
            sigma=noise_std * np.ones(trial.n_actions))
    elif noise_type == 'ornstein-uhlenbeck':
        hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(
            mean=np.zeros(trial.n_actions),
            sigma=noise_std * np.ones(trial.n_actions))
    return hyperparams
Exemplo n.º 3
0
                    initial_stddev=noise_std, desired_action_stddev=noise_std)
            elif 'normal' in noise_type:
                if 'lin' in noise_type:
                    hyperparams['action_noise'] = LinearNormalActionNoise(
                        mean=np.zeros(n_actions),
                        sigma=noise_std * np.ones(n_actions),
                        final_sigma=hyperparams.get('noise_std_final', 0.0) *
                        np.ones(n_actions),
                        max_steps=n_timesteps)
                else:
                    hyperparams['action_noise'] = NormalActionNoise(
                        mean=np.zeros(n_actions),
                        sigma=noise_std * np.ones(n_actions))
            elif 'ornstein-uhlenbeck' in noise_type:
                hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(
                    mean=np.zeros(n_actions),
                    sigma=noise_std * np.ones(n_actions))
            else:
                raise RuntimeError(
                    'Unknown noise type "{}"'.format(noise_type))
            print("Applying {} noise with std {}".format(
                noise_type, noise_std))
            del hyperparams['noise_type']
            del hyperparams['noise_std']
            if 'noise_std_final' in hyperparams:
                del hyperparams['noise_std_final']

        if args.trained_agent_folder != '':
            # Continue training
            print("Loading pretrained agent")
            # Policy should not be changed
from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise, AdaptiveParamNoiseSpec
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.ddpg.policies import LnMlpPolicy
from stable_baselines import DDPG
from stable_baselines.common import set_global_seeds
set_global_seeds(75)
env = gym.make('Hopper-v2')
env.seed(75)
# vectorized environments allow to easily multiprocess training
# we demonstrate its usefulness in the next examples
env = DummyVecEnv([lambda: env
                   ])  # The algorithms require a vectorized environment to run
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                            sigma=float(0.2) *
                                            np.ones(n_actions))
model = DDPG(LnMlpPolicy,
             env,
             param_noise=None,
             batch_size=64,
             buffer_size=1000000,
             enable_popart=False,
             action_noise=action_noise,
             verbose=4,
             seed=75,
             n_cpu_tf_sess=1)
model = model.load(
    r"/home/mohit/Downloads/stable-baselines/results_mohit/ddpg/Hopper-v2/None/75/best_model.pkl"
)
env_id = 'Hopper-v2'
def train(params, model=None, path=None):
    if model: # indicate in filename that this is a finetune
        if params['name']:
            params['name'] += '_Finetune'
        else:
            params['name'] = 'Finetune'
    
    data_dir, tb_path = get_paths(params, path=path)
    print("Training Parameters: ", params)
    os.makedirs(data_dir, exist_ok=True)
    # Save parameters immediatly
    params.save(data_dir)

    rank = mpi_rank_or_zero()
    if rank != 0:
        logger.set_level(logger.DISABLED)
    
    def make_env(i):
        env = get_env(params)
        env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset'])
        return env

    use_her = params['env_args']['use_her'] if 'use_her' in params['env_args'] else False

    if use_her:
        env = make_env(0)
        goal_selection_strategy = 'future'
    else:
        env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])])

    if model: # indicate in filename that this is a finetune
        print("Model action space", model.action_space, model.action_space.low)
        print("Env action space", env.action_space, env.action_space.low)
    if params['normalize']:
        env = VecNormalize(env)
    if params['seed']:
        seed = params['seed'] + 100000 * rank
        set_global_seeds(seed)
        params['alg_args']['seed'] = seed
    if 'noise' in params and params['noise']:
        from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise
        n_actions = env.action_space.shape[-1]
        params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions))
    
    if model is None:
        alg = get_alg(params)
        policy = get_policy(params)
        if use_her:
            from stable_baselines import HER
            model = HER(policy, env, alg, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, 
                            tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
        else:
            model = alg(policy,  env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
    else:
        model.set_env(env)

    model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, 
                                                    freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq']))
    print("######## SAVING MODEL TO", data_dir)
    model.save(data_dir +'/final_model')
    if params['normalize']:
        env.save(data_dir + '/normalized_environment.env')
    env.close()
Exemplo n.º 6
0
def train(params, model=None, env=None): 
    print("Training Parameters: ", params)

    data_dir, tb_path = get_paths(params)
    os.makedirs(data_dir, exist_ok=True)
    # Save parameters immediately
    params.save(data_dir)

    rank = mpi_rank_or_zero()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create the environment if not given
    if env is None:  
        def make_env(i):
            env = get_env(params)
            print("ENV IN UTIL" ,env)
            # TODO: make monitor work for multiple agent.
            env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset'])
            return env

        # if 'PPO' in params['alg']:
        #     env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])])
        # else:
        #     env = make_env(0)
        env = make_env(0)

        if params['normalize']:
            env = VecNormalize(env)
    # Set the seeds
    if params['seed']:
        seed = params['seed'] + 100000 * rank
        set_global_seeds(seed)
        params['alg_args']['seed'] = seed

    if 'noise' in params and params['noise']:
        from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise
        n_actions = env.action_space.shape[-1]
        params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions))
  

    print("ENV", env, env.action_space)
    if model is None:
        alg = get_alg(params)
        policy = get_policy(params)
        model = alg(policy,  env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
    else:
        model.set_env(env)

    print("\n===============================\n")
    print("TENSORBOARD PATH:", tb_path)
    print("\n===============================\n")
    model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], 
                callback=create_training_callback(data_dir, params, env, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq']))
    
    print("Saving model to", data_dir)
    model.save(data_dir +'/final_model')

    if params['normalize']:
        env.save(data_dir + '/environment.pkl')
        
    env.close()