def create_mask(self, *idcs): """ Create a mask selecting the given indices from this space. Every index should be a number or a name in labels. :param idcs: index list, which can either be varargs or a single iterable :return: mask array with 1 at each index """ mask = np.zeros(self.shape, dtype=np.bool_) if len(idcs) == 1 and isinstance( idcs[0], Iterable) and not isinstance(idcs[0], str): # Unwrap single iterable argument idcs = idcs[0] labels = self.labels # Set selected values to 1 for idx in idcs: if isinstance(idx, str): # Handle labels assert labels is not None, 'The space must be labeled to use label-based indexing' for idx_label, label in np.ndenumerate(labels): if label == idx: idx = idx_label break else: raise pyrado.ValueErr( msg=f'Label {idx} not found in {self}') if np.all(mask[idx] == 1): label_desc = f' ({labels[idx]})' if labels is not None else "" raise pyrado.ValueErr(msg=f'Duplicate index {idx}{label_desc}') mask[idx] = 1 return mask
def create_mask(self, *idcs) -> np.ndarray: """ Create a mask selecting the given indices from this space. Every index should be a number or a name in the space's labels. :param idcs: index list, which can either be varargs or a single iterable :return: boolean mask array with `1` at each index specified by the indices or labels """ mask = np.zeros(self.shape, dtype=np.bool_) if len(idcs) == 1 and isinstance(idcs[0], Iterable) and not isinstance(idcs[0], str): # Unwrap single iterable argument idcs = idcs[0] # Set selected values to 1 for idx in idcs: if isinstance(idx, str): # Handle labels if self.labels is None: raise pyrado.TypeErr(msg="The space must be labeled to use label-based indexing!") for idx_label, label in np.ndenumerate(self.labels): if label == idx: idx = idx_label break else: raise pyrado.ValueErr(msg=f"Label {idx} not found in {self}") if np.all(mask[idx] == 1): label_desc = f" ({self.labels[idx]})" if self.labels is not None else "" raise pyrado.ValueErr(msg=f"Duplicate index {idx}{label_desc}") mask[idx] = 1 return mask
def __init__(self, wrapped_env: Env, explicit_lb: Mapping[str, float] = None, explicit_ub: Mapping[str, float] = None): """ Constructor :param wrapped_env: environment to wrap :param explicit_lb: dict to override the environment's lower bound; by default (`None`) this is ignored; the keys are space labels, the values the new bound for that labeled entry :param explicit_ub: dict to override the environment's upper bound; by default (`None`) this is ignored; the keys are space labels, the values the new bound for that labeled entry """ Serializable._init(self, locals()) super().__init__(wrapped_env) # Explicitly override the bounds if desired self.explicit_lb = explicit_lb self.explicit_ub = explicit_ub # Get the bounds of the inner observation space wos = self.wrapped_env.obs_space lb, ub = wos.bounds # Override the bounds if desired and store the result for usage in _process_obs self.ov_lb = ObsNormWrapper.override_bounds(lb, self.explicit_lb, wos.labels) self.ov_ub = ObsNormWrapper.override_bounds(ub, self.explicit_ub, wos.labels) # Check if the new bounds are valid if any(self.ov_lb == -pyrado.inf): raise pyrado.ValueErr(msg=f'At least one element of the lower bounds is (negative) infinite:\n' f'(overwritten) bound: {self.ov_lb}\nnames: {wos.labels}') if any(self.ov_ub == pyrado.inf): raise pyrado.ValueErr(msg=f'At least one element of the upper bound is (positive) infinite:\n' f'(overwritten) bound: {self.ov_ub}\nnames: {wos.labels}')
def eval_init_policies(self): """ Execute the trained initial policies on the target device and store the estimated return per candidate. The number of initial policies to evaluate is the number of found policies. """ # Crawl through the experiment's directory for root, dirs, files in os.walk(self.save_dir): dirs.clear() # prevents walk() from going into subdirectories found_policies = [p for p in files if p.startswith('init_') and p.endswith('_policy.pt')] found_cands = [c for c in files if c.startswith('init_') and c.endswith('_candidate.pt')] if not len(found_policies) == len(found_cands): raise pyrado.ValueErr(msg='Found a different number of initial policies than candidates!') elif len(found_policies) == 0: raise pyrado.ValueErr(msg='No policies or candidates found!') num_init_cand = len(found_cands) cands_values = to.empty(num_init_cand) # Load all found candidates to save them into a single tensor found_cands = natural_sort(found_cands) # the order is important since it determines the rows of the tensor cands = to.stack([to.load(osp.join(self.save_dir, c)) for c in found_cands]) # Evaluate learned policies from random candidates on the target environment (real-world) system for i in range(num_init_cand): policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir, meta_info=dict(prefix=f'init_{i}')) cands_values[i] = self.eval_policy(self.save_dir, self._env_real, policy, self.mc_estimator, prefix=f'init_{i}', num_rollouts=self.num_eval_rollouts_real) # Save candidates's and their returns into tensors (policy is saved during training or exists already) # pyrado.save(cands, 'candidates', 'pt', self._save_dir, meta_info) pyrado.save(cands_values, 'candidates_values', 'pt', self.save_dir, meta_info=None) self.cands, self.cands_values = cands, cands_values
def from_stacked(dim: int, stacked: np.ndarray) -> "MultivariateNormalWrapper": r""" Creates an instance of this class from the given stacked numpy array as generated e.g. by `MultivariateNormalWrapper.get_stacked(self)`. :param dim: dimensionality `k` of the random variable :param stacked: array containing the mean and standard deviations of shape `(2 * k,)`, where the first `k` entries are the mean and the last `k` entries are the standard deviations :return: a `MultivariateNormalWrapper` with the given mean/cov. """ if not (len(stacked.shape) == 1): raise pyrado.ValueErr( msg="Stacked has invalid shape! Must be 1-dimensional.") if not (stacked.shape[0] == 2 * dim): raise pyrado.ValueErr( msg="Stacked has invalid size!" "Must be 2*dim (one times for mean, a second time for covariance cholesky diagonal)." ) mean = stacked[:dim] cov_chol_flat = stacked[dim:] return MultivariateNormalWrapper( to.tensor(mean).double(), to.tensor(cov_chol_flat).double())
def make_snapshot(self, snapshot_mode: str, curr_avg_ret: float = None, meta_info: dict = None): """ Make a snapshot of the training progress. This method is called from the subclasses and delegates to the custom method `save_snapshot()`. :param snapshot_mode: determines when the snapshots are stored (e.g. on every iteration or on new highscore) :param curr_avg_ret: current average return used for the snapshot_mode 'best' to trigger `save_snapshot()` :param meta_info: is not `None` if this algorithm is run as a subroutine of a meta-algorithm, contains a dict of information about the current iteration of the meta-algorithm """ if snapshot_mode == "latest": self.save_snapshot(meta_info) elif snapshot_mode == "best": if curr_avg_ret is None: raise pyrado.ValueErr( msg= "curr_avg_ret must not be None when snapshot_mode = 'best'!" ) if curr_avg_ret > self._highest_avg_ret: self._highest_avg_ret = curr_avg_ret self.save_snapshot(meta_info) elif snapshot_mode in {"no", "None"}: pass # don't save anything else: raise pyrado.ValueErr(given=snapshot_mode, eq_constraint="'latest', 'best', or 'no'")
def _process_obs_space(self, space: BoxSpace) -> BoxSpace: if not isinstance(space, BoxSpace): raise NotImplementedError( 'Only implemented ObsNormWrapper._process_obs_space() for BoxSpace!' ) # Get the bounds of the inner observation space lb, ub = space.bounds # Override the bounds if desired lb_ov = ObsNormWrapper.override_bounds(lb, self.explicit_lb, 'lower', space.labels) ub_ov = ObsNormWrapper.override_bounds(ub, self.explicit_ub, 'upper', space.labels) if any(lb_ov == -pyrado.inf): raise pyrado.ValueErr( msg= f'At least one element of the lower bounds is (negative) infinite:\n' f'(overwritten) bound: {lb_ov}\nnames: {space.labels}') if any(ub_ov == pyrado.inf): raise pyrado.ValueErr( msg= f'At least one element of the upper bound is (positive) infinite:\n' f'(overwritten) bound: {ub_ov}\nnames: {space.labels}') # Report actual bounds, which are not +-1 for overridden fields lb_norm = (lb - lb_ov) / (ub_ov - lb_ov) * 2 - 1 ub_norm = (ub - lb_ov) / (ub_ov - lb_ov) * 2 - 1 return BoxSpace(lb_norm, ub_norm, labels=space.labels)
def init_param(self, init_values: to.Tensor = None, **kwargs): if init_values is not None: # First check if there are some specific values to set self.param_values = init_values elif kwargs.get('prior', None) is not None: # Prior information is expected to be in form of a DomainRandomizer since it holds the distributions if not isinstance(kwargs['prior'], DomainRandomizer): raise pyrado.TypeErr(given=kwargs['prior'], expected_type=DomainRandomizer) # For every domain distribution parameter in the mapping, check if there is prior information for idx, ddp in self.mapping.items(): for dp in kwargs['prior'].domain_params: if ddp[0] == dp.name and ddp[1] in dp.get_field_names(): # The domain parameter exists in the prior and in the mapping val = getattr(dp, f'{ddp[1]}') if self.mask[idx]: # Log-transform since it will later be exp-transformed self.params[idx].data.fill_(to.log(to.tensor(val))) else: self.params[idx].data.fill_(to.tensor(val)) if to.any(to.isnan(self.params[idx].data)): raise pyrado.ValueErr(msg='DomainDistrParamPolicy parameter became NaN during' 'initialization! Check the mask and negative mean values.') else: raise pyrado.ValueErr(msg='DomainDistrParamPolicy needs to be initialized! Either with a set of policy' 'parameters, or with a prior in form of a DomainRandomizer!') if self._scale_params: # After initializing, we have an estimate on the magnitude of the policy parameters. Usually, the # non-transformed means are a magnitude smaller than e.g. the transformed stds. Thus, we will approximately # project them to [-0.5, 0.5] self.param_scaler = MinMaxScaler(bound_lo=-0.5, bound_up=0.5) self.params.data = self.param_scaler.scale_to(self.params.data) # params now in [-0.5, 0.5]
def __init__(self, vfcn: [nn.Module, Policy], gamma: float = 0.99, lamda: float = 0.95, num_epoch: int = 10, batch_size: int = 64, standardize_adv: bool = True, standardizer: [None, RunningStandardizer] = None, max_grad_norm: float = None, lr: float = 5e-4, lr_scheduler=None, lr_scheduler_hparam: [dict, None] = None): r""" Constructor :param vfcn: value function, which can be a `FNN` or a `Policy` :param gamma: temporal discount factor :param lamda: regulates the trade-off between bias (max for 0) and variance (max for 1), see [1] :param num_epoch: number of iterations over all gathered samples during one estimator update :param batch_size: number of samples per estimator update batch :param standardize_adv: if `True`, the advantages are standardized to be $~ N(0,1)$ :param standardizer: pass `None` to use stateless standardisation, alternatively pass `RunningStandardizer()` to use a standardizer wich keeps track of past values :param max_grad_norm: maximum L2 norm of the gradients for clipping, set to `None` to disable gradient clipping :param lr: (initial) learning rate for the optimizer which can be by modified by the scheduler. By default, the learning rate is constant. :param lr_scheduler: learning rate scheduler that does one step per epoch (pass through the whole data set) :param lr_scheduler_hparam: hyper-parameters for the learning rate scheduler """ if not isinstance(vfcn, (nn.Module, Policy)): raise pyrado.TypeErr(given=vfcn, expected_type=[nn.Module, Policy]) if isinstance(vfcn, Policy): if not vfcn.env_spec.act_space == ValueFunctionSpace: raise pyrado.ShapeErr(msg='The given act_space held by the vfcn should be a ValueFunctionSpace.') if not 0 <= gamma <= 1: raise pyrado.ValueErr(given=gamma, ge_constraint='0', le_constraint='1') if not 0 <= lamda <= 1: raise pyrado.ValueErr(given=lamda, ge_constraint='0', le_constraint='1') # Call Module's constructor super().__init__() # Store the inputs self._vfcn = vfcn self.gamma = gamma self.lamda = lamda self.num_epoch = num_epoch self.batch_size = batch_size self.max_grad_norm = max_grad_norm self.standardize_adv = standardize_adv self.standardizer = standardizer # Initialize self.loss_fcn = nn.MSELoss() self.optim = to.optim.Adam(self._vfcn.parameters(), lr=lr, eps=1e-5) self._lr_scheduler = lr_scheduler self._lr_scheduler_hparam = lr_scheduler_hparam if lr_scheduler is not None: self._lr_scheduler = lr_scheduler(self.optim, **lr_scheduler_hparam)
def __init__(self, noise_dim: [int, tuple], std_init: [float, to.Tensor], std_min: [float, to.Tensor] = 0.01, train_mean: bool = False, learnable: bool = True): """ Constructor :param noise_dim: number of dimension :param std_init: initial standard deviation for the exploration noise :param std_min: minimal standard deviation for the exploration noise :param train_mean: `True` if the noise should have an adaptive nonzero mean, `False` otherwise :param learnable: `True` if the parameters should be tuneable (default), `False` for shallow use (just sampling) """ if not isinstance(std_init, (float, to.Tensor)): raise pyrado.TypeErr(given=std_init, expected_type=(float, to.Tensor)) if isinstance(std_init, to.Tensor) and not std_init.size() == noise_dim: raise pyrado.ShapeErr(given=std_init, expected_match=to.empty(noise_dim)) if not (isinstance(std_init, float) and std_init > 0 or isinstance(std_init, to.Tensor) and all(std_init > 0)): raise pyrado.ValueErr(given=std_init, g_constraint='0') if not isinstance(std_min, (float, to.Tensor)): raise pyrado.TypeErr(given=std_min, expected_type=(float, to.Tensor)) if not (isinstance(std_min, float) and std_min > 0 or isinstance(std_min, to.Tensor) and all(std_min > 0)): raise pyrado.ValueErr(given=std_min, g_constraint='0') super().__init__() # Register parameters if learnable: self.cov = nn.Parameter(to.Tensor(noise_dim, noise_dim), requires_grad=True) self.mean = nn.Parameter( to.Tensor(noise_dim), requires_grad=True) if train_mean else None else: self.cov = to.empty(noise_dim, noise_dim) self.mean = None # Initialize parameters self.cov_init = std_init**2 * to.eye(noise_dim) if isinstance( std_init, float) else to.diag(to.pow(std_init, 2)) self.std_min = to.tensor(std_min) if isinstance(std_min, float) else std_min if not isinstance(self.cov_init, to.Tensor): raise pyrado.TypeErr(given=self.cov_init, expected_type=to.Tensor) if not isinstance(self.std_min, to.Tensor): raise pyrado.TypeErr(given=self.std_min, expected_type=to.Tensor) self.reset_expl_params()
def __init__( self, num_dof: int, max_steps: int, dt: float = 1 / 500.0, ip: Optional[str] = "192.168.2.2", ): """ Constructor :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used :param max_steps: maximum number of time steps :param dt: sampling time interval, changing this value is highly discouraged :param ip: IP address of the PC controlling the Barrett WAM, pass `None` to skip connecting """ # Make sure max_steps is reachable if not max_steps < pyrado.inf: raise pyrado.ValueErr(given=max_steps, given_name="max_steps", l_constraint=pyrado.inf) # Call the base class constructor to initialize fundamental members super().__init__(dt, max_steps) # Create the robcom client and connect to it. Use a Process to timeout if connection cannot be established. self._connected = False self._client = robcom.Client() self._robot_group_name = "RIGHT_ARM" try: self._client.start(ip, 2013, 1000) # ip address, port, timeout in ms self._connected = True print_cbt("Connected to the Barret WAM client.", "c", bright=True) except RuntimeError: print_cbt("Connection to the Barret WAM client failed!", "r", bright=True) self._jg = self._client.robot.get_group([self._robot_group_name]) self._dc = None # direct-control process self._t = None # only needed for WAMBallInCupRealStepBased # Desired joint position for the initial state and indices of the joints the policy operates on self._num_dof = num_dof if self._num_dof == 4: self._qpos_des_init = INIT_QPOS_DES_4DOF self._idcs_act = [0, 1, 2, 3] # use all joints by default elif self._num_dof == 7: self._qpos_des_init = INIT_QPOS_DES_7DOF self._idcs_act = [0, 1, 2, 3, 4, 5, 6] # use all joints by default else: raise pyrado.ValueErr(given=self._num_dof, eq_constraint="4 or 7") # Initialize task self._task = self._create_task(task_args=dict()) # Trajectory containers (are set in reset()) self.qpos_real = None self.qvel_real = None self.qpos_des = None self.qvel_des = None
def from_stacked( self, stacked: np.ndarray ) -> "ParameterAgnosticMultivariateNormalWrapper": """ Builds a new `ParameterAgnosticMultivariateNormalWrapper` from the given stacked values. In contrast to `MultivariateNormalWrapper.from_stacked(dim, stacked)`, this does not require a dimensionality as it is an instance rather than a static method. Also, the stacked representations has to either contain the mean or the standard deviations or both, according the the values originally passed to the constructor. If one of them is not treated as a parameter, the current values is copied instead. :param stacked: the stacked representation of the parameters according to the documentation above; can have either shape `(0,)`, `(k,)`, or `(2 * k)` :return: a `ParameterAgnosticMultivariateNormalWrapper` with the new values for the parameters """ if not (len(stacked.shape) == 1): raise pyrado.ValueErr( msg="Stacked has invalid shape! Must be 1-dimensional.") expected_dim_multiplier = 0 if self._mean_is_parameter: expected_dim_multiplier += 1 if self._cov_is_parameter: expected_dim_multiplier += 1 if not (stacked.shape[0] == expected_dim_multiplier * self.dim): raise pyrado.ValueErr( msg= f"Stacked has invalid size! Must be {expected_dim_multiplier}*dim." ) if self._mean_is_parameter and self._cov_is_parameter: mean = stacked[:self.dim] cov_chol_flat = stacked[self.dim:] elif self._mean_is_parameter and not self._cov_is_parameter: mean = stacked[:self.dim] cov_chol_flat = self.cov_chol_flat elif not self._mean_is_parameter and self._cov_is_parameter: mean = self.mean cov_chol_flat = stacked else: mean = self.mean cov_chol_flat = self.cov_chol_flat if type(mean) == np.ndarray: mean = to.tensor(mean).double() if type(cov_chol_flat) == np.ndarray: cov_chol_flat = to.tensor(cov_chol_flat).double() return ParameterAgnosticMultivariateNormalWrapper( mean=mean, cov_chol_flat=cov_chol_flat, mean_is_parameter=self._mean_is_parameter, cov_is_parameter=self._cov_is_parameter, )
def skyline( dt: Union[int, float, np.ndarray], t_end: Union[int, float, np.ndarray], t_intvl_space: BoxSpace, val_space: BoxSpace, ) -> Tuple[np.ndarray, np.ndarray]: """ Step function that randomly samples a value from the given range, and then holds this value for a time interval which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is long enough, i.e. `dt * t_end` samples. :param dt: time step size :param t_end: final time :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled :return: array of time steps together with the associated array of values """ if dt <= 0: raise pyrado.ValueErr(given=dt, g_constraint="0") if t_end < dt: raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}") if not isinstance(t_intvl_space, BoxSpace): raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace) if not isinstance(val_space, BoxSpace): raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace) if not t_intvl_space.flat_dim == 1: raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, )) if not val_space.flat_dim == 1: raise pyrado.ShapeErr(given=val_space, expected_match=(1, )) dt = np.asarray(dt, dtype=np.float32) t_end = np.asarray(t_end, dtype=np.float32) # First iter t_intvl = t_intvl_space.sample_uniform() t_intvl = np.clip(t_intvl, dt, t_end + dt) t = np.arange(start=0.0, stop=t_intvl, step=dt) vals = val_space.sample_uniform() * np.ones_like(t) # Iterate until the time is up while t[-1] < t_end: t_intvl = t_intvl_space.sample_uniform() t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt) t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt) t = np.concatenate([t, t_new]) val_new = val_space.sample_uniform() * np.ones_like(t_new) vals = np.concatenate([vals, val_new]) return t, vals
def unstandardize( self, data: Union[np.ndarray, to.Tensor]) -> Union[np.ndarray, to.Tensor]: r""" Revert the previous standardization of the input data to make it $~ N(\mu, \sigma)$. :param data: input ndarray or Tensor :return: un-standardized ndarray or Tensor """ if self.mean is None or self.std is None: raise pyrado.ValueErr(msg="Use standardize before unstandardize!") # Input type must match stored type if isinstance(data, np.ndarray) and isinstance(self.mean, np.ndarray): pass elif isinstance(data, to.Tensor) and isinstance(self.mean, to.Tensor): pass elif isinstance(data, np.ndarray) and isinstance(self.mean, to.Tensor): self.mean = self.mean.numpy() self.std = self.std.numpy() elif isinstance(data, to.Tensor) and isinstance(self.mean, np.ndarray): self.mean = to.from_numpy(self.mean).to(to.get_default_dtype()) self.std = to.from_numpy(self.std).to(to.get_default_dtype()) x_unstd = data * self.std + self.mean return x_unstd
def __init__(self, *args, **kwargs): """ Constructor :param expl_r_init: initial radius of the hyper sphere for the exploration strategy :param args: forwarded the superclass constructor :param kwargs: forwarded the superclass constructor """ # Preprocess inputs and call HC's constructor expl_r_init = kwargs.pop('expl_r_init') if expl_r_init <= 0: raise pyrado.ValueErr(given=expl_r_init, g_constraint='0') if 'expl_std_init' in kwargs: # This is just for the ability to create one common hyper-param list for HCNormal and HCHyper kwargs.pop('expl_std_init') # Get from kwargs with default values self.expl_r_min = kwargs.pop('expl_r_min', 0.01) self.expl_r_max = max(expl_r_init, kwargs.pop('expl_r_max', 10.)) # Call HC's constructor super().__init__(*args, **kwargs) self._expl_strat = HyperSphereParamNoise( param_dim=self._policy.num_param, expl_r_init=expl_r_init, )
def __init__(self, save_dir: str, max_iter: int, policy: Optional[Policy], logger: Optional[StepLogger] = None, save_name: str = 'algo'): """ Constructor :param save_dir: directory to save the snapshots i.e. the results in :param max_iter: maximum number of iterations :param policy: Pyrado policy (subclass of PyTorch's Module) to train :param logger: logger for every step of the algorithm, if `None` the default logger will be created :param save_name: name of the algorithm's pickle file without the ending, this becomes important if the algorithm is run as a subroutine """ if not isinstance(max_iter, int) and max_iter > 0: raise pyrado.ValueErr(given=max_iter, g_constraint='0') if not isinstance(policy, Policy) and policy is not None: raise pyrado.TypeErr( msg='If a policy is given, it needs to be of type Policy!') if not isinstance(logger, StepLogger) and logger is not None: raise pyrado.TypeErr( msg='If a logger is given, it needs to be of type StepLogger!') if not isinstance(save_name, str): raise pyrado.TypeErr(given=save_name, expected_type=str) self._save_dir = save_dir self._save_name = save_name self._max_iter = max_iter self._curr_iter = 0 self._policy = policy self._logger = logger self._cnt_samples = 0 self._highest_avg_ret = -pyrado.inf # for snapshot_mode = 'best'
def __init__(self, wrapped_env: Union[SimEnv, EnvWrapper], mask: Union[List[str], Tuple[str]]): """ Constructor :param wrapped_env: environment to wrap :param mask: every domain parameters which names are in this mask will be transformed. Capitalisation matters. """ if not isinstance(wrapped_env, (SimEnv, EnvWrapper)): raise pyrado.TypeErr(given=wrapped_env, expected_type=(SimEnv, EnvWrapper)) if not isinstance(mask, (list, tuple)): raise pyrado.TypeErr(given=wrapped_env, expected_type=(list, tuple)) Serializable._init(self, locals()) # Call EnvWrapper's constructor super().__init__(wrapped_env) if any(item not in wrapped_env.supported_domain_param for item in mask): raise pyrado.ValueErr( msg= f"The specified mask {mask} contains domain parameters that are not supported by the wrapped " f"environment! Here are the supported domain parameters {wrapped_env.supported_domain_param}." ) self._mask = mask
def generate_oscillation_data(dt, t_end, excitation): """ Use OMOEnv to generate a 1-dim damped oscillation signal. :param dt: time step size [s] :param t_end: Time duration [s] :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time) :return: 1-dim oscillation trajectory """ env = OneMassOscillatorSim(dt, np.ceil(t_end / dt)) env.domain_param = dict(m=1., k=10., d=2.0) if excitation == 'force': policy = TimePolicy( env.spec, functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt) reset_kwargs = dict(init_state=np.array([0, 0])) elif excitation == 'position': policy = IdlePolicy(env.spec) reset_kwargs = dict(init_state=np.array([0.5, 0])) else: raise pyrado.ValueErr(given=excitation, eq_constraint="'force' or 'position'") # Generate the data ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False) return ro.observations[:, 0]
def get_rollout(self, index): """ Get an indexed sub-rollout. :param index: generic index of sub-rollout, negative values, slices and iterables are allowed :return: selected subset. """ if not self.continuous: raise pyrado.ValueErr( msg='Sub-rollouts are only supported on continuous data.') if isinstance(index, slice): # Analyze slice start, end, step = index.indices(self.rollout_count) if step == 1: # A simple, continuous slice bounds = self._rollout_bounds start_step = bounds[start] end_step = bounds[end] return self[start_step:end_step] # Convert nonstandard slice to range index = range(start, end, step) if isinstance(index, Iterable): # Nontrivial non-continuous slice, need to slice each element and concat them. return StepSequence.concat([self.get_rollout(i) for i in index], self.data_format) # Decode index index = _index_to_int(index, self.rollout_count) bounds = self._rollout_bounds start_step = bounds[index] end_step = bounds[index + 1] return self[start_step:end_step]
def __init__(self, num_checkpoints: int, init_checkpoint: int = 0, *args, **kwargs): """ Constructor :param num_checkpoints: total number of checkpoints :param init_checkpoint: initial value of the cyclic counter, defaults to 0, use negative values can to mark sections that should only be executed once :param args: positional arguments forwarded to Algorithm's constructor :param kwargs: keyword arguments forwarded to Algorithm's constructor """ if not isinstance(num_checkpoints, int): raise pyrado.TypeErr(given=num_checkpoints, expected_type=int) if num_checkpoints < 1: raise pyrado.ValueErr(given=num_checkpoints, ge_constraint="1") if not isinstance(init_checkpoint, int): raise pyrado.TypeErr(given=init_checkpoint, expected_type=int) self._num_checkpoints = num_checkpoints self._curr_checkpoint = init_checkpoint # Call Algorithm's constructor super().__init__(*args, **kwargs)
def print_cbt(msg: str, color: str = "", bright: bool = False, tag: str = "", end="\n"): """ Print a colored (and bright) message with a tag in the beginning. :param msg: string to print :param color: color to print in, default `''` is the IDE's/system's default :param bright: flag if the message should be printed bright :param tag: tag to be printed in brackets in front of the message :param end: endline symbol forwarded to `print()` """ brgt = Style.BRIGHT if bright else "" if not isinstance(tag, str): raise pyrado.TypeErr(given=tag, expected_type=str) else: if tag != "": tag = f"[{tag}] " color = color.lower() if color in ["", "w", "white"]: print(brgt + tag + msg + Style.RESET_ALL, end=end) elif color in ["y", "yellow"]: print(Fore.YELLOW + brgt + tag + msg + Style.RESET_ALL, end=end) elif color in ["b", "blue"]: print(Fore.BLUE + brgt + tag + msg + Style.RESET_ALL, end=end) elif color in ["g", "green"]: print(Fore.GREEN + brgt + tag + msg + Style.RESET_ALL, end=end) elif color in ["r", "red"]: print(Fore.RED + brgt + tag + msg + Style.RESET_ALL, end=end) elif color in ["c", "cyan"]: print(Fore.CYAN + brgt + tag + msg + Style.RESET_ALL, end=end) else: raise pyrado.ValueErr(given=color, eq_constraint="'y', 'b', 'g', 'r', or 'c'")
def __init__(self, num_feat_per_dim: int, bounds: [ Sequence[np.ndarray], Sequence[to.Tensor], Sequence[float] ], scale: float = None, state_wise_norm: bool = True): """ Constructor :param num_feat_per_dim: number of radial basis functions, identical for every dimension of the input :param bounds: lower and upper bound for the Gaussians' centers, the input dimension is inferred from them :param scale: scaling factor for the squared distance, if `None` the factor is determined such that two neighboring RBFs have a value of 0.2 at the other center :param state_wise_norm: `True` to apply the normalization across input state dimensions separately (every dimension sums to one), or `False` to jointly normalize them """ if not num_feat_per_dim > 1: raise pyrado.ValueErr(given=num_feat_per_dim, g_constraint='1') if not len(bounds) == 2: raise pyrado.ShapeErr(given=bounds, expected_match=np.empty(2)) # Get the bounds, e.g. from the observation space and then clip them in case the bounds_to = [None, None] for i, b in enumerate(bounds): if isinstance(b, np.ndarray): bounds_to[i] = to.from_numpy(b) elif isinstance(b, to.Tensor): bounds_to[i] = b.clone() elif isinstance(b, (int, float)): bounds_to[i] = to.tensor(b, dtype=to.get_default_dtype()).view( 1, ) else: raise pyrado.TypeErr( given=b, expected_type=[np.ndarray, to.Tensor, int, float]) if any([any(np.isinf(b)) for b in bounds_to]): bound_lo, bound_up = [ to.clamp(b, min=-1e6, max=1e6) for b in bounds_to ] print_cbt('Clipped the bounds of the RBF centers to [-1e6, 1e6].', 'y') else: bound_lo, bound_up = bounds_to # Create a matrix with center locations for the Gaussians num_dim = len(bound_lo) self.num_feat = num_feat_per_dim * num_dim self.centers = to.empty(num_feat_per_dim, num_dim) for i in range(num_dim): # Features along columns self.centers[:, i] = to.linspace(bound_lo[i], bound_up[i], num_feat_per_dim) if scale is None: delta_center = self.centers[1, :] - self.centers[0, :] self.scale = -to.log(to.tensor(0.2)) / to.pow(delta_center, 2) else: self.scale = scale self._state_wise_norm = state_wise_norm
def pd_capacity_32_abs(p: to.Tensor, s: to.Tensor, h: to.Tensor, tau: to.Tensor, **kwargs) -> to.Tensor: r""" Capacity-based dynamics with 3 stable ($p=-C$, $p=0$, $p=C$) and 2 unstable fix points ($p=-C/2$, $p=C/2$) for $s=0$ $\tau \dot{p} = \left( s + (h - p) (1 - \frac{\left| (h - p) \right|}{C}) (1 - \frac{2 \left| (h - p) \right|}{C}) \right)$ The "absolute version" of `pd_capacity_32` is less skewed due to a lower oder of the resulting polynomial. .. note:: Intended to be used with tanh activation function, e.g. for the velocity tasks in RcsPySim. :param p: potential, higher values lead to higher activations :param s: stimulus, higher values lead to larger changes of the potentials (depends on the dynamics function) :param h: resting level, a.k.a. constant offset :param tau: time scaling factor, higher values lead to slower changes of the potentials (linear dependency) :param kwargs: additional parameters to the potential dynamics """ if not all(tau > 0): raise pyrado.ValueErr(given=tau, g_constraint="0") return ( s + (h - p) * (to.ones_like(p) - to.abs(h - p) / kwargs["capacity"]) * (to.ones_like(p) - 2 * to.abs(h - p) / kwargs["capacity"]) ) / tau
def dt(self, dt: Union[int, float]): """ Set the time step size. """ if not dt > 0: raise pyrado.ValueErr(given=dt, g_constraint='0') if not isinstance(dt, (float, int)): raise pyrado.TypeErr(given=dt, expected_type=[float, int]) self._dt = float(dt)
def rollout_lengths(self): """ Lengths of sub-rollouts. """ if not self.continuous: raise pyrado.ValueErr( msg='Sub-rollouts are only supported on continuous data.') bounds = self._rollout_bounds return bounds[1:] - bounds[:-1]
def scale_to( self, data: Union[np.ndarray, to.Tensor]) -> Union[np.ndarray, to.Tensor]: r""" Transform the input data to be in $[a, b]$, where $a$ and $b$ are defined during construction. :param data: unscaled input ndarray or Tensor :return: ndarray or Tensor scaled to be in $[a, b]$ """ # Convert to the right type if necessary bound_lo, bound_up = self._convert_bounds(data) if not (bound_lo < bound_up).all(): raise pyrado.ValueErr(given_name="lower bound", l_constraint="upper bound") if isinstance(data, np.ndarray): self._data_min = np.min(data) self._data_span = np.max(data) - np.min(data) elif isinstance(data, to.Tensor): self._data_min = to.min(data) self._data_span = to.max(data) - to.min(data) else: raise pyrado.TypeErr(given=data, expected_type=[np.ndarray, to.Tensor]) data_ = (data - self._data_min) / self._data_span return data_ * (bound_up - bound_lo) + bound_lo
def curr_step(self, curr_step: int): """Set the number of the current replay step (0 for the initial step).""" if not isinstance(curr_step, int) or not 0 <= curr_step < len(self._act_rec_buffer[self._curr_rec]): raise pyrado.ValueErr( given=curr_step, ge_constraint="0 (int)", l_constraint=len(self._act_rec_buffer[self._curr_rec]) ) self._curr_step = curr_step
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask: """ Create a task which punishes collision costs given a collision model with pairs of bodies. This task only looks at the instantaneous collision cost. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param factor: cost / reward function scaling factor :return: masked task that only considers a subspace of all observations """ if not factor >= 0: raise pyrado.ValueErr(given=factor, ge_constraint="0") # Define the indices for selection. This needs to match the observations' names in RcsPySim. obs_labels = ["CollCost"] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace( env_spec.state_space.create_mask(obs_labels)), ) rew_fcn = AbsErrRewFcn(q=np.array([factor]), r=np.zeros(spec.act_space.shape)) # Create an endlessly running desired state task (no collision is desired) task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded) # Mask selected collision cost observation return MaskedTask(env_spec, task, obs_labels)
def ensure_math_mode(inp: [str, Sequence[str]]) -> [str, list]: """ Naive way to ensure that a sting is compatible with LaTeX math mode for printing. :param inp: input string :return s: sting in math mode """ if isinstance(inp, str): if inp.count("$") == 0: # There are no $ symbols yet if not inp[0] == "$": inp = "$" + inp if not inp[-1] == "$": inp = inp + "$" elif inp.count("$") % 2 == 0: # There is an even number of $ symbols, so we assume they are correct and do nothing pass else: raise pyrado.ValueErr(msg=f"The string {inp} must contain an even number of '$' symbols!") elif inp is None: return None # in case there a Space has 1 one dimension but no labels elif isinstance(inp, Iterable): # Do it recursively return [ensure_math_mode(s) if s is not None else None for s in inp] # skip None entries else: raise pyrado.TypeErr(given=inp, expected_type=[str, list]) return inp
def override_bounds(bounds: np.ndarray, override: Optional[Mapping[str, float]], bound_label: str, names: np.ndarray) -> np.ndarray: """ Override a given bound. This function is useful if some entries of the observation space have an infinite bound and/or you want to specify a certain bound :param bounds: bound to override :param override: value to override with :param bound_label: label of the bound to override :param names: e.g. lower or upper :return: new bound created from a copy of the old bound """ if not override: return bounds # Override in copy of bounds bc = bounds.copy() for idx, name in np.ndenumerate(names): ov = override.get(name) if ov is not None: # Apply override bc[idx] = ov elif np.isinf(bc[idx]): # Report unbounded entry raise pyrado.ValueErr( msg= f'{name} entry of {bound_label} bound is infinite and not overridden.' f'Cannot apply normalization.') else: # Do nothing if ov is None pass return bc