def __init__( self, action_space, mu=0, theta=0.15, max_sigma=0.3, min_sigma=0.3, decay_period=100000, ): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) if min_sigma is None: min_sigma = max_sigma self.mu = mu self.theta = theta self.sigma = max_sigma self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self.dim = np.prod(action_space.low.shape) self.low = action_space.low self.high = action_space.high self.state = np.ones(self.dim) * self.mu self.reset()
def __init__(self, init_sequential=False, eigreg=False, warmstart=True): self._init_sequential = init_sequential self._eigreg = eigreg self._warmstart = warmstart self._sigma = None self._serializable_initialized = False Serializable.quick_init(self, locals()) super(GMM, self).__init__()
def __init__(self, obs_dim, action_dim, hidden_sizes=(100, 100), **kwargs): QFunction.__init__(self, obs_dim=obs_dim, action_dim=action_dim) self._serializable_initialized = False Serializable.quick_init(self, locals()) self.save_init_params(locals()) FlattenMlp.__init__(self, hidden_sizes=hidden_sizes, input_size=obs_dim + action_dim, output_size=1, **kwargs)
def __init__( self, env, reward_scale=1., normalize_obs=False, online_normalization=False, obs_mean=None, obs_var=None, obs_alpha=0.001, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) # Observation Space if normalize_obs is True and online_normalization is True: raise AttributeError self._normalize_obs = normalize_obs self._online_normalize_obs = online_normalization if self._normalize_obs or self._online_normalize_obs: if obs_mean is None: obs_mean = np.zeros_like(env.observation_space.low) else: obs_mean = np.array(obs_mean) if obs_var is None: obs_var = np.ones_like(env.observation_space.low) else: obs_var = np.array(obs_var) self._obs_mean = obs_mean self._obs_var = obs_var self._obs_alpha = obs_alpha self._obs_mean_diff = np.zeros_like(env.observation_space.low) self._obs_n = 0 # Action Space if isinstance(self._wrapped_env.action_space, Box): ub = np.ones(self._wrapped_env.action_space.shape) self.action_space = Box(-1 * ub, ub, dtype=np.float32) # Reward Scale self._reward_scale = reward_scale
def __init__(self, action_space, max_sigma=1.0, min_sigma=None, decay_period=1000000): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = action_space
def __init__(self, horizon, obs_dim, action_dim): self._T = horizon Transition.__init__(self, obs_dim=obs_dim, action_dim=action_dim) self._serializable_initialized = False Serializable.quick_init(self, locals()) super(TVLGDynamics, self).__init__() self.Fm = nn.Parameter( ptu.zeros(horizon, obs_dim, obs_dim + action_dim)) self.fv = nn.Parameter(ptu.ones(horizon, obs_dim)) self.dyn_covar = nn.Parameter(ptu.zeros(horizon, obs_dim, obs_dim)) # Prior self._prior = None
def save_init_params(self, locals): """ Should call this FIRST THING in the __init__ method if you ever want to serialize or clone this network. Usage: ``` def __init__(self, ...): self.init_serialization(locals()) ... ``` :param locals: :return: """ Serializable.quick_init(self, locals)
def __getstate__(self): d = Serializable.__getstate__(self) # Add these explicitly in case they were modified d["_obs_mean"] = self._obs_mean d["_obs_var"] = self._obs_var d["_reward_scale"] = self._reward_scale return d
def __init__( self, action_space, horizon, smooth=True, renormalize=True, sigma=10.0, sigma_scale=None, ): Serializable.quick_init(self, locals()) self._action_space = action_space self.low = action_space.low self.high = action_space.high self._horizon = horizon self._smooth = smooth self._renormalize = renormalize self._sigma = sigma if sigma_scale is None: self._sigma_scale = np.ones(self.action_dim) else: # Check if iterable try: iter(sigma_scale) if len(sigma_scale) != self.action_dim: raise ValueError("Sigma scale different than action dim" "(%02d != %02d)" % (sigma_scale, self.action_dim)) self._sigma_scale = sigma_scale except TypeError as te: self._sigma_scale = np.repeat(sigma_scale, self.action_dim) self.noise = None self.reset()
def __init__(self, wrapped_env): Serializable.quick_init(self, locals()) self._wrapped_env = wrapped_env self.action_space = self._wrapped_env.action_space self.observation_space = self._wrapped_env.observation_space
def __init__( self, goal_reward=10, actuation_cost_coeff=30, distance_cost_coeff=1, log_distance_cost_coeff=1, alpha=1e-6, init_position=None, init_sigma=0.1, goal_position=None, goal_threshold=0.1, dynamics_sigma=0, horizon=None, subtask=None, seed=None, ): Serializable.__init__(self) Serializable.quick_init(self, locals()) # Set the seed self.seed(seed) self._subtask = subtask # Point Dynamics self._dynamics = PointDynamics(dim=2, sigma=dynamics_sigma) # Initial Position if init_position is None: init_position = (0, 0) self.init_mu = np.array(init_position, dtype=np.float32) self.init_sigma = init_sigma # Goal Position if goal_position is None: self._goal_position = np.array([5, 5], dtype=np.float32) else: self._goal_position = np.array(goal_position, dtype=np.float32) # Masks self.goal_masks = [[True, True], [True, False], [False, True]] # Reward-related Variables self._goal_threshold = goal_threshold self._goal_reward = goal_reward self._action_cost_coeff = actuation_cost_coeff self._distance_cost_coeff = distance_cost_coeff self._alpha = alpha self._log_distance_cost_coeff = log_distance_cost_coeff # Maximum Reward self._max_rewards = [0, 0, 0] reward_output = self.compute_reward(self.goal_position, np.zeros(2)) self._max_rewards[0] = reward_output[0] self._max_rewards[1] = reward_output[2][0] self._max_rewards[2] = reward_output[2][1] # Bounds self._xlim = (-7, 7) self._ylim = (-7, 7) self._vel_bound = 1. self._observation = None # Main Rendering self._main_fig = None self._main_ax = None self._dynamic_line = None self._main_marker = None self._env_lines = list() # Subgoals rendering self._subgoals_fig = None self._subgoals_ax = None self._dynamic_goals_lines = list() self._subgoal_markers = [None for _ in range(self.n_subgoals)] # Time-related variables self._t_counter = 0 self._horizon = horizon # Random self.np_random = np.random
def __setstate__(self, d): Serializable.__setstate__(self, d) self.set_param_values(d["params"])
def __getstate__(self): d = Serializable.__getstate__(self) d["params"] = self.get_param_values() return d
def copy(self): copy = Serializable.clone(self) ptu.copy_model_params_from_to(self, copy) return copy
def __init__(self, action_space, prob_random_action=0.1): Serializable.quick_init(self, locals()) assert isinstance(action_space, Discrete) Serializable.quick_init(self, locals()) self.prob_random_action = prob_random_action self.action_space = action_space
def __init__( self, obs_dim, n_vs, shared_hidden_sizes=None, unshared_hidden_sizes=None, hidden_activation='relu', hidden_w_init='xavier_normal', hidden_b_init_val=0, output_activation='linear', output_w_init='xavier_normal', output_b_init_val=0, shared_layer_norm=False, unshared_layer_norm=False, layer_norm_kwargs=None, ): VFunction.__init__(self, obs_dim=obs_dim) self._n_vs = n_vs self._serializable_initialized = False Serializable.quick_init(self, locals()) super(NNMultiVFunction, self).__init__() if layer_norm_kwargs is None: layer_norm_kwargs = dict() self._hidden_activation = ptu.get_activation(hidden_activation) self._output_activation = ptu.get_activation(output_activation) self._shared_layer_norm = shared_layer_norm self._unshared_layer_norm = unshared_layer_norm self._sfcs = [] self._sfc_norms = [] self._ufcs = [list() for _ in range(self._n_vs)] self._ufc_norms = [list() for _ in range(self._n_vs)] self._ufcs_lasts = [] in_size = obs_dim # Shared Layers if shared_hidden_sizes is not None: for ii, next_size in enumerate(shared_hidden_sizes): sfc = nn.Linear(in_size, next_size) ptu.layer_init(layer=sfc, activation=hidden_activation, b=hidden_b_init_val) self.__setattr__("sfc{}".format(ii), sfc) self._sfcs.append(sfc) if self._shared_layer_norm: ln = LayerNorm(next_size) self.__setattr__("sfc{}_norm".format(ii), ln) self._sfc_norms.append(ln) in_size = next_size # Unshared Layers if unshared_hidden_sizes is not None: for ii, next_size in enumerate(unshared_hidden_sizes): for q_idx in range(self._n_vs): ufc = nn.Linear(in_size, next_size) ptu.layer_init(layer=ufc, activation=hidden_activation, b=hidden_b_init_val) self.__setattr__("ufc{}_{}".format(q_idx, ii), ufc) self._ufcs[q_idx].append(ufc) if self._unshared_layer_norm: ln = LayerNorm(next_size) tmp_txt = "ufc{}_{}_norm".format(q_idx, ii) self.__setattr__(tmp_txt, ln) self._ufc_norms[q_idx].append(ln) in_size = next_size for q_idx in range(self._n_vs): last_ufc = nn.Linear(in_size, 1) ptu.layer_init(layer=last_ufc, activation=output_activation, b=output_b_init_val) self.__setattr__("ufc_last{}".format(q_idx), last_ufc) self._ufcs_lasts.append(last_ufc)
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_var = d["_obs_var"] self._reward_scale = d["_reward_scale"]