except FileNotFoundError: # We did not use VecNormalize during training, skip pass yield policy finally: if model is not None and model.sess is not None: model.sess.close() return f policy_registry.register( "random", value=registry.build_loader_fn_require_space( registry.dummy_context(RandomPolicy), ), ) policy_registry.register( "zero", value=registry.build_loader_fn_require_space( registry.dummy_context(ZeroPolicy), ), ) def _add_stable_baselines_policies(classes): for k, (cls_name, attr) in classes.items(): try: cls = registry.load_attr(cls_name) fn = _load_stable_baselines(cls, attr) policy_registry.register(k, value=fn) except (AttributeError, ImportError):
value=_load_reward_net_as_fn(k, False)) _add_reward_net_as_fn_loaders(REWARD_NETS) def load_zero(path: str, venv: VecEnv) -> RewardFn: def f(old_obs: np.ndarray, act: np.ndarray, new_obs: np.ndarray, steps: np.ndarray) -> np.ndarray: return np.zeros(old_obs.shape[0]) return f reward_fn_registry.register(key='zero', value=registry.dummy_context(load_zero)) @util.docstring_parameter(reward_types=", ".join(reward_fn_registry.keys())) def load_reward(reward_type: str, reward_path: str, venv: VecEnv) -> ContextManager[RewardFn]: """Load serialized policy. Args: reward_type: A key in `reward_registry`, e.g. `RewardNet`. Valid types include {reward_types}. reward_path: A path specifying the reward. venv: An environment that the policy is to be used with. """ reward_loader = reward_fn_registry.get(reward_type) return reward_loader(reward_path, venv)
delta_vel = target_vel - vel delta_vel_norm = np.linalg.norm(delta_vel, ord=np.inf, axis=1).reshape(-1, 1) act = delta_vel / np.maximum(delta_vel_norm, 1e-4) act = act.clip(-1, 1) return act, None, None, None def proba_step(self, obs, state=None, mask=None): raise NotImplementedError() # Register custom policies with imitation policy_serialize.policy_registry.register( key="evaluating_rewards/PointMassHardcoded-v0", value=registry.build_loader_fn_require_space( registry.dummy_context(PointMassPolicy)), ) # Register custom rewards with evaluating_rewards reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassGroundTruth-v0", value=registry.build_loader_fn_require_space(PointMassGroundTruth), ) reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassSparseWithCtrl-v0", value=registry.build_loader_fn_require_space(PointMassSparseReward), ) reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassSparseNoCtrl-v0", value=registry.build_loader_fn_require_space(PointMassSparseReward, ctrl_coef=0.0),
del path, venv def f(obs: np.ndarray, act: np.ndarray, next_obs: np.ndarray, dones: np.ndarray) -> np.ndarray: del act, next_obs, dones # Unused. return np.zeros(obs.shape[0]) return f reward_registry.register(key="DiscrimNet", value=_load_discrim_net) reward_registry.register(key="RewardNet_shaped", value=_load_reward_net_as_fn(shaped=True)) reward_registry.register(key="RewardNet_unshaped", value=_load_reward_net_as_fn(shaped=False)) reward_registry.register(key="zero", value=registry.dummy_context(load_zero)) @util.docstring_parameter(reward_types=", ".join(reward_registry.keys())) def load_reward(reward_type: str, reward_path: str, venv: VecEnv) -> ContextManager[common.RewardFn]: """Load serialized policy. Args: reward_type: A key in `reward_registry`. Valid types include {reward_types}. reward_path: A path specifying the reward. venv: An environment that the policy is to be used with. """ reward_loader = reward_registry.get(reward_type) return reward_loader(reward_path, venv)