def sample_td3_params(trial): """ Sampler for TD3 hyperparams. :param trial: (optuna.trial) :return: (dict) """ gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform('lr', 1e-5, 1) batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 100, 128, 256, 512]) buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)]) train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 1000, 2000]) gradient_steps = train_freq noise_type = trial.suggest_categorical('noise_type', ['ornstein-uhlenbeck', 'normal']) noise_std = trial.suggest_uniform('noise_std', 0, 1) hyperparams = { 'gamma': gamma, 'learning_rate': learning_rate, 'batch_size': batch_size, 'buffer_size': buffer_size, 'train_freq': train_freq, 'gradient_steps': gradient_steps, } if noise_type == 'normal': hyperparams['action_noise'] = NormalActionNoise(mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) elif noise_type == 'ornstein-uhlenbeck': hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) return hyperparams
def sample_ddpg_params(trial): """ Sampler for DDPG hyperparams. :param trial: (optuna.trial) :return: (dict) """ gamma = trial.suggest_categorical( 'gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) # actor_lr = trial.suggest_loguniform('actor_lr', 1e-5, 1) # critic_lr = trial.suggest_loguniform('critic_lr', 1e-5, 1) learning_rate = trial.suggest_loguniform('lr', 1e-5, 1) batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256]) buffer_size = trial.suggest_categorical( 'memory_limit', [int(1e4), int(1e5), int(1e6)]) noise_type = trial.suggest_categorical( 'noise_type', ['ornstein-uhlenbeck', 'normal', 'adaptive-param']) noise_std = trial.suggest_uniform('noise_std', 0, 1) normalize_observations = trial.suggest_categorical( 'normalize_observations', [True, False]) normalize_returns = trial.suggest_categorical('normalize_returns', [True, False]) hyperparams = { 'gamma': gamma, 'actor_lr': learning_rate, 'critic_lr': learning_rate, 'batch_size': batch_size, 'memory_limit': buffer_size, 'normalize_observations': normalize_observations, 'normalize_returns': normalize_returns } if noise_type == 'adaptive-param': hyperparams['param_noise'] = AdaptiveParamNoiseSpec( initial_stddev=noise_std, desired_action_stddev=noise_std) # Apply layer normalization when using parameter perturbation hyperparams['policy_kwargs'] = dict(layer_norm=True) elif noise_type == 'normal': hyperparams['action_noise'] = NormalActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) elif noise_type == 'ornstein-uhlenbeck': hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) return hyperparams
initial_stddev=noise_std, desired_action_stddev=noise_std) elif 'normal' in noise_type: if 'lin' in noise_type: hyperparams['action_noise'] = LinearNormalActionNoise( mean=np.zeros(n_actions), sigma=noise_std * np.ones(n_actions), final_sigma=hyperparams.get('noise_std_final', 0.0) * np.ones(n_actions), max_steps=n_timesteps) else: hyperparams['action_noise'] = NormalActionNoise( mean=np.zeros(n_actions), sigma=noise_std * np.ones(n_actions)) elif 'ornstein-uhlenbeck' in noise_type: hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise( mean=np.zeros(n_actions), sigma=noise_std * np.ones(n_actions)) else: raise RuntimeError( 'Unknown noise type "{}"'.format(noise_type)) print("Applying {} noise with std {}".format( noise_type, noise_std)) del hyperparams['noise_type'] del hyperparams['noise_std'] if 'noise_std_final' in hyperparams: del hyperparams['noise_std_final'] if args.trained_agent_folder != '': # Continue training print("Loading pretrained agent") # Policy should not be changed
from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise, AdaptiveParamNoiseSpec from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines.ddpg.policies import LnMlpPolicy from stable_baselines import DDPG from stable_baselines.common import set_global_seeds set_global_seeds(75) env = gym.make('Hopper-v2') env.seed(75) # vectorized environments allow to easily multiprocess training # we demonstrate its usefulness in the next examples env = DummyVecEnv([lambda: env ]) # The algorithms require a vectorized environment to run n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.2) * np.ones(n_actions)) model = DDPG(LnMlpPolicy, env, param_noise=None, batch_size=64, buffer_size=1000000, enable_popart=False, action_noise=action_noise, verbose=4, seed=75, n_cpu_tf_sess=1) model = model.load( r"/home/mohit/Downloads/stable-baselines/results_mohit/ddpg/Hopper-v2/None/75/best_model.pkl" ) env_id = 'Hopper-v2'
def train(params, model=None, path=None): if model: # indicate in filename that this is a finetune if params['name']: params['name'] += '_Finetune' else: params['name'] = 'Finetune' data_dir, tb_path = get_paths(params, path=path) print("Training Parameters: ", params) os.makedirs(data_dir, exist_ok=True) # Save parameters immediatly params.save(data_dir) rank = mpi_rank_or_zero() if rank != 0: logger.set_level(logger.DISABLED) def make_env(i): env = get_env(params) env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset']) return env use_her = params['env_args']['use_her'] if 'use_her' in params['env_args'] else False if use_her: env = make_env(0) goal_selection_strategy = 'future' else: env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])]) if model: # indicate in filename that this is a finetune print("Model action space", model.action_space, model.action_space.low) print("Env action space", env.action_space, env.action_space.low) if params['normalize']: env = VecNormalize(env) if params['seed']: seed = params['seed'] + 100000 * rank set_global_seeds(seed) params['alg_args']['seed'] = seed if 'noise' in params and params['noise']: from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise n_actions = env.action_space.shape[-1] params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions)) if model is None: alg = get_alg(params) policy = get_policy(params) if use_her: from stable_baselines import HER model = HER(policy, env, alg, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model = alg(policy, env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model.set_env(env) model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq'])) print("######## SAVING MODEL TO", data_dir) model.save(data_dir +'/final_model') if params['normalize']: env.save(data_dir + '/normalized_environment.env') env.close()
def train(params, model=None, env=None): print("Training Parameters: ", params) data_dir, tb_path = get_paths(params) os.makedirs(data_dir, exist_ok=True) # Save parameters immediately params.save(data_dir) rank = mpi_rank_or_zero() if rank != 0: logger.set_level(logger.DISABLED) # Create the environment if not given if env is None: def make_env(i): env = get_env(params) print("ENV IN UTIL" ,env) # TODO: make monitor work for multiple agent. env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset']) return env # if 'PPO' in params['alg']: # env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])]) # else: # env = make_env(0) env = make_env(0) if params['normalize']: env = VecNormalize(env) # Set the seeds if params['seed']: seed = params['seed'] + 100000 * rank set_global_seeds(seed) params['alg_args']['seed'] = seed if 'noise' in params and params['noise']: from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise n_actions = env.action_space.shape[-1] params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions)) print("ENV", env, env.action_space) if model is None: alg = get_alg(params) policy = get_policy(params) model = alg(policy, env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model.set_env(env) print("\n===============================\n") print("TENSORBOARD PATH:", tb_path) print("\n===============================\n") model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, params, env, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq'])) print("Saving model to", data_dir) model.save(data_dir +'/final_model') if params['normalize']: env.save(data_dir + '/environment.pkl') env.close()