示例#1
0
def configure_env(arg_dict, model_logdir=None, for_train=True):
    env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"],
                     "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
                     "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], 
                     "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"],
                     "action_repeat": arg_dict["action_repeat"],
                     "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"],
                     "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"),
                     "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"],
                     "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}),
                     "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
                     "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
                     "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"]}
    if for_train:
        env_arguments["gui_on"] = False
    else:
        env_arguments["gui_on"] = arg_dict["gui"]

    if arg_dict["algo"] == "her":
        env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict")  # her needs obs as a dict
    else:
        env = gym.make(arg_dict["env_name"], **env_arguments)
    if for_train:
        if arg_dict["engine"] == "mujoco":
            env = VecMonitor(env, model_logdir) if arg_dict["multiprocessing"] else Monitor(env, model_logdir)
        elif arg_dict["engine"] == "pybullet":
            env = Monitor(env, model_logdir, info_keywords=tuple('d'))

    if arg_dict["algo"] == "her":
        env = HERGoalEnvWrapper(env)
    return env
示例#2
0
def test_model_manipulation(model_class, goal_selection_strategy):
    env = BitFlippingEnv(N_BITS,
                         continuous=model_class in [DDPG, SAC],
                         max_steps=N_BITS)
    env = DummyVecEnv([lambda: env])

    model = HER('MlpPolicy',
                env,
                model_class,
                n_sampled_goal=3,
                goal_selection_strategy=goal_selection_strategy,
                verbose=0)
    model.learn(1000)

    model_predict(model, env, n_steps=100, additional_check=None)

    model.save('./test_her')
    del model

    # NOTE: HER does not support VecEnvWrapper yet
    with pytest.raises(AssertionError):
        model = HER.load('./test_her', env=VecNormalize(env))

    model = HER.load('./test_her')

    # Check that the model raises an error when the env
    # is not wrapped (or no env passed to the model)
    with pytest.raises(ValueError):
        model.predict(env.reset())

    env_ = BitFlippingEnv(N_BITS,
                          continuous=model_class in [DDPG, SAC],
                          max_steps=N_BITS)
    env_ = HERGoalEnvWrapper(env_)

    model_predict(model, env_, n_steps=100, additional_check=None)

    model.set_env(env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    del model

    env = BitFlippingEnv(N_BITS,
                         continuous=model_class in [DDPG, SAC],
                         max_steps=N_BITS)
    model = HER.load('./test_her', env=env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    if os.path.isfile('./test_her.pkl'):
        os.remove('./test_her.pkl')
    def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        :return: (gym.Env)
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
                                            wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
            if normalize:
                if args.verbose > 0:
                    if len(normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **normalize_kwargs)
        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
            del hyperparams['frame_stack']
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
示例#4
0
 def __init__(self,
              env: ISettableGoalEnv,
              verbose=1,
              experiment_name="ppo",
              rank=0):
     self._env = env
     self._dirs = Dirs(
         experiment_name=f"{type(env).__name__}-{experiment_name}",
         rank=rank)
     self._flat_env = HERGoalEnvWrapper(env)
     options = {
         "env": DummyVecEnv([lambda: self._flat_env]),
         "tensorboard_log": self._dirs.tensorboard,
         "gamma": 1,
         "seed": rank,
         "nminibatches": 1
     }
     if os.path.isdir(self._dirs.models) and os.path.isfile(
             self._dirs.best_model):
         self._model = PPO2.load(load_path=self._dirs.best_model, **options)
         print(f"Loaded model {self._dirs.best_model}")
     else:
         self._model = PPO2("MlpPolicy", verbose=verbose, **options)
    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = eval_freq
        if isinstance(model.get_env(), VecEnv):
            eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        if algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(eval_env, VecEnv):
                eval_env = _UnvecWrapper(eval_env)
            eval_env = HERGoalEnvWrapper(eval_env)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost
示例#6
0
class PPOAgent(Agent):
    name = "ppo"

    def __init__(self,
                 env: ISettableGoalEnv,
                 verbose=1,
                 experiment_name="ppo",
                 rank=0):
        self._env = env
        self._dirs = Dirs(
            experiment_name=f"{type(env).__name__}-{experiment_name}",
            rank=rank)
        self._flat_env = HERGoalEnvWrapper(env)
        options = {
            "env": DummyVecEnv([lambda: self._flat_env]),
            "tensorboard_log": self._dirs.tensorboard,
            "gamma": 1,
            "seed": rank,
            "nminibatches": 1
        }
        if os.path.isdir(self._dirs.models) and os.path.isfile(
                self._dirs.best_model):
            self._model = PPO2.load(load_path=self._dirs.best_model, **options)
            print(f"Loaded model {self._dirs.best_model}")
        else:
            self._model = PPO2("MlpPolicy", verbose=verbose, **options)

    def __call__(self, obs: Observation) -> np.ndarray:
        flat_obs = self._flat_env.convert_dict_to_obs(obs)
        action, _ = self._model.predict(flat_obs, deterministic=True)
        return action

    def train(self,
              timesteps: int,
              num_checkpoints=4,
              callbacks: Sequence[BaseCallback] = None):
        ppo_offset = 128
        callbacks = [] if callbacks is None else callbacks
        cb = CheckpointCallback(save_freq=timesteps // num_checkpoints,
                                save_path=self._dirs.models,
                                name_prefix=self._dirs.prefix)
        self._model.learn(total_timesteps=timesteps + ppo_offset,
                          callback=CallbackList([cb, *callbacks]),
                          log_interval=100)
示例#7
0
def main():
    panda_env = PandaGraspGymEnv(urdf_root=object_data.getDataPath(),
                                 is_rendering=True,
                                 use_ik=True,
                                 is_discrete=True,
                                 num_controlled_joints=7,
                                 reward_type="sparse")

    env = HERGoalEnvWrapper(panda_env)

    model = HER.load("logs/rl_model_1000000_steps.zip")

    episode_rewards, episode_lengths, episode_success = evaluate_policy(
        model,
        env,
        n_eval_episodes=50,
        render=False,
        deterministic=True,
        return_episode_rewards=True)
    print("Final Reward {}, Episode Length{}, Success Rate {}".format(
        np.mean(episode_rewards), np.mean(episode_lengths),
        np.mean(episode_success)))
示例#8
0
def main(_algo_name, _trained_models_dir, _trained_model_name, _lock_rotation,
         _should_eval, _eval_num_episodes, _should_render):

    is_discrete = True if _algo_name == 'DQN' else False

    eval_env = HERGoalEnvWrapper(
        get_environment(_lock_rotation=_lock_rotation,
                        _is_discrete=is_discrete,
                        _should_eval=_should_eval,
                        _should_render=_should_render))

    _trained_models_dir = _trained_models_dir if _trained_models_dir.endswith(
        '/') else _trained_models_dir + '/'

    model = ALGOS[_algo_name].load(_trained_models_dir + _trained_model_name)

    if _should_eval:
        episode_rewards, episode_lengths, episode_success = evaluate_policy(
            model=model,
            env=eval_env,
            n_eval_episodes=_eval_num_episodes,
            render=(not _should_eval) or _should_render,
            deterministic=True,
            return_episode_rewards=True)

        print(
            "Final evaluation for DDPG algorithm on {} episodes: "
            "\nReward: \n \tMEAN: {}, \tSTD: {}, \nEpisode Length: \n \tMEAN: {}, \tSTD: {}, \nSuccess Rate: {}"
            .format(_eval_num_episodes, np.mean(episode_rewards),
                    np.std(episode_rewards), np.mean(episode_lengths),
                    np.std(episode_lengths), np.mean(episode_success)))

    else:
        obs = eval_env.reset()
        for i in range(2000):
            action, _states = model.predict(obs)
            obs, rewards, done, info = eval_env.step(action)
            eval_env.render(mode='human')
            if done:
                obs = eval_env.reset()
示例#9
0
    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_episodes, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_episodes < n_test_episodes:
                # Use default value for deterministic
                action, _ = self_.predict(obs)
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = self_.test_env.reset()

            mean_reward = np.mean(rewards)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        model = model_fn(**kwargs)
        model.test_env = env_fn(n_envs=1)
        model.trial = trial
        if algo == 'her':
            model.model.trial = trial
            # Wrap the env if need to flatten the dict obs
            if isinstance(model.test_env, VecEnv):
                model.test_env = _UnvecWrapper(model.test_env)
            model.model.test_env = HERGoalEnvWrapper(model.test_env)

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        del model.env, model.test_env
        del model

        if is_pruned:
            raise optuna.structs.TrialPruned()

        return cost
示例#10
0
def main(_algo_name, _algo_tag, _tag_suffix, _save_freq, _lock_rotation, _eval_num, _eval_freq, hyperparams):
    rotation_tag = "_LOCKED_ROT_" if _lock_rotation else "_ROTATION_"
    full_tag = _algo_name + rotation_tag + _algo_tag + _tag_suffix
    current_dir = _algo_name + "/" + full_tag
    log_dir = current_dir + "/log/"
    eval_log_dir = current_dir + "/log/eval/"
    trained_models_dir = current_dir + "/models/"
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(eval_log_dir, exist_ok=True)
    os.makedirs(trained_models_dir, exist_ok=True)

    is_discrete = True if _algo_name == 'DQN' else False

    panda_env = HERGoalEnvWrapper(CustomMonitor(get_environment(_lock_rotation=_lock_rotation,
                                                                _is_discrete=is_discrete), log_dir))
    eval_env = HERGoalEnvWrapper(CustomMonitor(get_environment(_lock_rotation=_lock_rotation,
                                                               _is_discrete=is_discrete), eval_log_dir))

    callbacks = []
    callbacks.append(CheckpointCallback(_save_freq, trained_models_dir)) if _save_freq > 0 else None
    callbacks.append(MeanHundredEpsTensorboardCallback(log_dir))
    callbacks.append(StdHundredEpsTensorboardCallback(log_dir))
    callbacks.append(SuccessRateTensorboardCallback(log_dir))
    if _algo_name == 'DDPG':
        callbacks.append(SaveOnBestTrainingRewardCallback(10000, log_dir))
    else:
        callbacks.append(EvalCallback(eval_env,
                                      best_model_save_path=trained_models_dir,
                                      log_path=log_dir,
                                      eval_freq=_eval_freq,
                                      deterministic=True,
                                      render=False,
                                      n_eval_episodes=_eval_num)) if _eval_freq > 0 else None

    time_steps = hyperparams.pop('n_timesteps') if hyperparams.get('n_timesteps') is not None else None

    param_noise = None
    action_noise = None
    if hyperparams.get('noise_type') is not None:
        noise_type = hyperparams.pop('noise_type').strip()
        if 'ornstein-uhlenbeck' in noise_type:
            n_actions = panda_env.action_space.shape[-1]
            action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                        sigma=float(0.005) * np.ones(n_actions))
        elif 'param_noise' in noise_type:
            param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.1, desired_action_stddev=0.1)

    # add action noise for DDPG or TD3, in DQN noise as flag already in hyperparams
    if _algo_name == 'DDPG' or _algo_name == 'TD3':
        hyperparams['action_noise'] = action_noise

    # add hyperparams specific only for DDPG
    if _algo_name == 'DDPG':
        hyperparams['param_noise'] = param_noise
        hyperparams['eval_env'] = eval_env

    model = ALGOS[_algo_name](env=panda_env,
                              tensorboard_log="tensorboard/",
                              n_cpu_tf_sess=None,
                              **hyperparams)

    model.learn(total_timesteps=time_steps,
                callback=callbacks,
                tb_log_name=full_tag,
                log_interval=10)

    model.save(current_dir + "/" + full_tag + "_final")
示例#11
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)

            # added by Pierre (for some reason, monitoring the training wasn't enabled for DDPG)
            log_file = os.path.join(log_dir,
                                    str(rank)) if log_dir is not None else None
            env = Monitor(env, log_file)

            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([
                    make_env(env_id,
                             0,
                             args.seed,
                             wrapper_class=env_wrapper,
                             log_dir=log_dir,
                             env_kwargs=env_kwargs)
                ])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([
                    make_env(env_id,
                             i,
                             args.seed,
                             log_dir=log_dir,
                             wrapper_class=env_wrapper,
                             env_kwargs=env_kwargs) for i in range(n_envs)
                ])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(
                            local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
示例#12
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path
        if eval_env:
            eval_env_kwargs = env_kwargs

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        else:
            if n_envs == 1:
                if eval_env:
                    eval_env_kwargs["goal_tolerance_parameters"]["set_tol"] = 0.001
                    env = DummyVecEnv(
                        [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir,
                                  info_keywords=("is_success", "error"), env_kwargs=eval_env_kwargs)])
                else:
                    env = DummyVecEnv(
                        [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir,
                                  info_keywords=(), env_kwargs=env_kwargs)])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
                                            wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
示例#13
0
def configure_env(arg_dict, model_logdir=None, for_train=True):
    if arg_dict["engine"] == "pybullet":
        env_arguments = {
            "render_on":
            True,
            "visualize":
            arg_dict["visualize"],
            "workspace":
            arg_dict["workspace"],
            "robot":
            arg_dict["robot"],
            "robot_init_joint_poses":
            arg_dict["robot_init"],
            "robot_action":
            arg_dict["robot_action"],
            "task_type":
            arg_dict["task_type"],
            "num_subgoals":
            arg_dict["num_subgoals"],
            "task_objects":
            arg_dict["task_objects"],
            "distractors":
            arg_dict["distractors"],
            "distractor_moveable":
            arg_dict["distractor_moveable"],
            "distractor_constant_speed":
            arg_dict["distractor_constant_speed"],
            "distractor_movement_dimensions":
            arg_dict["distractor_movement_dimensions"],
            "distractor_movement_endpoints":
            arg_dict["distractor_movement_endpoints"],
            "coefficient_kd":
            arg_dict["coefficient_kd"],
            "coefficient_kw":
            arg_dict["coefficient_kw"],
            "coefficient_ka":
            arg_dict["coefficient_ka"],
            "observed_links_num":
            arg_dict["observed_links_num"],
            "reward_type":
            arg_dict["reward_type"],
            "distance_type":
            arg_dict["distance_type"],
            "used_objects":
            arg_dict["used_objects"],
            "object_sampling_area":
            arg_dict["object_sampling_area"],
            "active_cameras":
            arg_dict["camera"],
            "max_steps":
            arg_dict["max_episode_steps"],
            "visgym":
            arg_dict["visgym"],
            "reward":
            arg_dict["reward"],
            "logdir":
            arg_dict["logdir"],
            "vae_path":
            arg_dict["vae_path"],
            "yolact_path":
            arg_dict["yolact_path"],
            "yolact_config":
            arg_dict["yolact_config"]
        }
        if for_train:
            env_arguments["gui_on"] = False
        else:
            env_arguments["gui_on"] = arg_dict["gui"]

        if arg_dict["algo"] == "her":
            env = gym.make(arg_dict["env_name"],
                           **env_arguments,
                           obs_space="dict")  # her needs obs as a dict
        else:
            env = gym.make(arg_dict["env_name"], **env_arguments)
    elif arg_dict["engine"] == "mujoco":
        if arg_dict["multiprocessing"]:
            # ACKTR, PPO2, A2C, DDPG can use vectorized environments, but the only way to display the results (for me) is using CV2 imshow. -(TensorFlow comment)
            env = make_vec_env(arg_dict["env_name"],
                               n_envs=arg_dict["vectorized_envs"])
        else:
            env = gym.make(arg_dict["env_name"])
    if for_train:
        if arg_dict["engine"] == "mujoco":
            env = VecMonitor(
                env, model_logdir) if arg_dict["multiprocessing"] else Monitor(
                    env, model_logdir)
        elif arg_dict["engine"] == "pybullet":
            env = Monitor(env, model_logdir, info_keywords=tuple('d'))

    if arg_dict["algo"] == "her":
        env = HERGoalEnvWrapper(env)
    return env
示例#14
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        # Set initialzier and action type for environment, standard implementation currently does not support
        # custom types, so pass them here (kwargs is global, so do set again during repeated calls)
        if "initializer" in env_kwargs.keys() and isinstance(
                env_kwargs["initializer"], int):
            if env_kwargs["initializer"] == 0:
                env_kwargs["initializer"] = RandomInitializer(
                    env_kwargs.pop("difficulty"))
            elif env_kwargs["initializer"] == 1:
                env_kwargs["initializer"] = CompletelyRandomInitializer()
            else:
                raise RuntimeError('Unsupported initializer "{}"'.format(
                    env_kwargs["initializer"]))

        if "action_type" in env_kwargs.keys() and isinstance(
                env_kwargs["action_type"], int):
            if env_kwargs["action_type"] == "POSITION":
                env_kwargs["action_type"] = ActionType.POSITION
            elif env_kwargs["action_type"] == "TORQUE":
                env_kwargs["action_type"] = ActionType.TORQUE
            elif env_kwargs["action_type"] == "TORQUE_AND_POSITION":
                env_kwargs["action_type"] = ActionType.TORQUE_AND_POSITION
            else:
                raise RuntimeError('Unsupported Action Type"{}"'.format(
                    kwargs["action_type"]))
        else:
            env_kwargs["action_type"] = ActionType.POSITION

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([
                    make_env(env_id,
                             0,
                             args.seed,
                             wrapper_class=env_wrapper,
                             log_dir=log_dir,
                             env_kwargs=env_kwargs)
                ])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([
                    make_env(env_id,
                             i,
                             args.seed,
                             log_dir=log_dir,
                             wrapper_class=env_wrapper,
                             env_kwargs=env_kwargs) for i in range(n_envs)
                ])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(
                            local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env