def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: if algo in ['ddpg', 'td3'] or trial.model_class in [ DDPG, TD3 ]: # bug to report: changed by Pierre trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = eval_freq if isinstance(model.get_env(), VecEnv): eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: use non-deterministic eval for Atari? eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=True) if algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(eval_env, VecEnv): print("UNVECTORIZE ENV") eval_env = _UnvecWrapper(eval_env) # eval_env = HERGoalEnvWrapper(eval_env) # commented by Pierre try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned cost = -1 * eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return cost
def create_env(n_envs, eval_env=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :return: (Union[gym.Env, VecEnv]) :return: (gym.Env) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env else save_path if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id, **env_kwargs) env.seed(args.seed) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)]) if normalize: if args.verbose > 0: if len(normalize_kwargs) > 0: print("Normalization activated: {}".format(normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) del hyperparams['frame_stack'] if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env
def __init__(self, policy_name: str, env_dict): self.policy_name = policy_name self.policy = get_policy_from_string(self.policy_name) self.env_dict = env_dict self.tasks = [key for key in self.env_dict.keys() ] if self.env_dict is not None else None self.verbose = config.verbose self.observation_space_dict = {} self.action_space_dict = {} self.n_envs_per_task = None self.num_timesteps = 0 if env_dict is not None: if not isinstance(env_dict, dict): print( "env_dict must be a dictionary with keys as the name of the game and values are SubprocVecEnv objects" ) for key in env_dict.keys(): self.observation_space_dict[key] = env_dict[ key].observation_space self.action_space_dict[key] = env_dict[key].action_space for key in self.env_dict.keys(): if isinstance(self.env_dict[key], VecEnv): if env_dict[key].num_envs == 1: self.env_dict[key] = _UnvecWrapper(env_dict[key]) self._vectorize_action = True if self.n_envs_per_task is None: self.n_envs_per_task = self.env_dict[key].num_envs else: if self.n_envs_per_task != self.env_dict[key].num_envs: raise ValueError( "All tasks must have the same number of environments " ) break else: raise ValueError( "Error: the model requires a vectorized environment, please use a VecEnv wrapper." )
def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) def callback(_locals, _globals): """ Callback for monitoring learning progress. :param _locals: (dict) :param _globals: (dict) :return: (bool) If False: stop training """ self_ = _locals['self'] trial = self_.trial # Initialize variables if not hasattr(self_, 'is_pruned'): self_.is_pruned = False self_.last_mean_test_reward = -np.inf self_.last_time_evaluated = 0 self_.eval_idx = 0 if (self_.num_timesteps - self_.last_time_evaluated) < evaluate_interval: return True self_.last_time_evaluated = self_.num_timesteps # Evaluate the trained agent on the test env rewards = [] n_episodes, reward_sum = 0, 0.0 # Sync the obs rms if using vecnormalize # NOTE: this does not cover all the possible cases if isinstance(self_.test_env, VecNormalize): self_.test_env.obs_rms = deepcopy(self_.env.obs_rms) # Do not normalize reward self_.test_env.norm_reward = False obs = self_.test_env.reset() while n_episodes < n_test_episodes: # Use default value for deterministic action, _ = self_.predict(obs) obs, reward, done, _ = self_.test_env.step(action) reward_sum += reward if done: rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = self_.test_env.reset() mean_reward = np.mean(rewards) self_.last_mean_test_reward = mean_reward self_.eval_idx += 1 # report best or report current ? # report num_timesteps or elasped time ? trial.report(-1 * mean_reward, self_.eval_idx) # Prune trial if need if trial.should_prune(self_.eval_idx): self_.is_pruned = True return False return True model = model_fn(**kwargs) model.test_env = env_fn(n_envs=1) model.trial = trial if algo == 'her': model.model.trial = trial # Wrap the env if need to flatten the dict obs if isinstance(model.test_env, VecEnv): model.test_env = _UnvecWrapper(model.test_env) model.model.test_env = HERGoalEnvWrapper(model.test_env) try: model.learn(n_timesteps, callback=callback) # Free memory model.env.close() model.test_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() model.test_env.close() raise is_pruned = False cost = np.inf if hasattr(model, 'is_pruned'): is_pruned = model.is_pruned cost = -1 * model.last_mean_test_reward del model.env, model.test_env del model if is_pruned: raise optuna.structs.TrialPruned() return cost
def create_env(n_envs, eval_env=False, no_log=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :param no_log: (bool) Do not log training when doing hyperparameter optim (issue with writing the same file) :return: (Union[gym.Env, VecEnv]) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else save_path if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id, **env_kwargs) env.seed(args.seed) # added by Pierre (for some reason, monitoring the training wasn't enabled for DDPG) log_file = os.path.join(log_dir, str(rank)) if log_dir is not None else None env = Monitor(env, log_file) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([ make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs) ]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([ make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs) ]) if normalize: # Copy to avoid changing default values by reference local_normalize_kwargs = normalize_kwargs.copy() # Do not normalize reward for env used for evaluation if eval_env: if len(local_normalize_kwargs) > 0: local_normalize_kwargs['norm_reward'] = False else: local_normalize_kwargs = {'norm_reward': False} if args.verbose > 0: if len(local_normalize_kwargs) > 0: print("Normalization activated: {}".format( local_normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **local_normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env
def create_env(n_envs, eval_env=False, no_log=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :param no_log: (bool) Do not log training when doing hyperparameter optim (issue with writing the same file) :return: (Union[gym.Env, VecEnv]) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else save_path if eval_env: eval_env_kwargs = env_kwargs if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) else: if n_envs == 1: if eval_env: eval_env_kwargs["goal_tolerance_parameters"]["set_tol"] = 0.001 env = DummyVecEnv( [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, info_keywords=("is_success", "error"), env_kwargs=eval_env_kwargs)]) else: env = DummyVecEnv( [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, info_keywords=(), env_kwargs=env_kwargs)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)]) if normalize: # Copy to avoid changing default values by reference local_normalize_kwargs = normalize_kwargs.copy() # Do not normalize reward for env used for evaluation if eval_env: if len(local_normalize_kwargs) > 0: local_normalize_kwargs['norm_reward'] = False else: local_normalize_kwargs = {'norm_reward': False} if args.verbose > 0: if len(local_normalize_kwargs) > 0: print("Normalization activated: {}".format(local_normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **local_normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env
def create_env(n_envs, eval_env=False, no_log=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :param no_log: (bool) Do not log training when doing hyperparameter optim (issue with writing the same file) :return: (Union[gym.Env, VecEnv]) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else save_path # Set initialzier and action type for environment, standard implementation currently does not support # custom types, so pass them here (kwargs is global, so do set again during repeated calls) if "initializer" in env_kwargs.keys() and isinstance( env_kwargs["initializer"], int): if env_kwargs["initializer"] == 0: env_kwargs["initializer"] = RandomInitializer( env_kwargs.pop("difficulty")) elif env_kwargs["initializer"] == 1: env_kwargs["initializer"] = CompletelyRandomInitializer() else: raise RuntimeError('Unsupported initializer "{}"'.format( env_kwargs["initializer"])) if "action_type" in env_kwargs.keys() and isinstance( env_kwargs["action_type"], int): if env_kwargs["action_type"] == "POSITION": env_kwargs["action_type"] = ActionType.POSITION elif env_kwargs["action_type"] == "TORQUE": env_kwargs["action_type"] = ActionType.TORQUE elif env_kwargs["action_type"] == "TORQUE_AND_POSITION": env_kwargs["action_type"] = ActionType.TORQUE_AND_POSITION else: raise RuntimeError('Unsupported Action Type"{}"'.format( kwargs["action_type"])) else: env_kwargs["action_type"] = ActionType.POSITION if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id, **env_kwargs) env.seed(args.seed) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([ make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs) ]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([ make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs) ]) if normalize: # Copy to avoid changing default values by reference local_normalize_kwargs = normalize_kwargs.copy() # Do not normalize reward for env used for evaluation if eval_env: if len(local_normalize_kwargs) > 0: local_normalize_kwargs['norm_reward'] = False else: local_normalize_kwargs = {'norm_reward': False} if args.verbose > 0: if len(local_normalize_kwargs) > 0: print("Normalization activated: {}".format( local_normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **local_normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env