def plot_observations(ro: StepSequence, idcs_sel: Sequence[int] = None): """ Plot all observation trajectories of the given rollout. :param ro: input rollout :param idcs_sel: indices of the selected selected observations, if `None` plot all """ if hasattr(ro, "observations"): if not isinstance(ro.observations, np.ndarray): raise pyrado.TypeErr(given=ro.observations, expected_type=np.ndarray) # Select dimensions to plot dim_obs = range( ro.observations.shape[1]) if idcs_sel is None else idcs_sel # Use recorded time stamps if possible t = getattr(ro, "time", np.arange(0, ro.length + 1)) if len(dim_obs) <= 6: divisor = 2 elif len(dim_obs) <= 12: divisor = 4 else: divisor = 8 num_cols = int(np.ceil(len(dim_obs) / divisor)) num_rows = int(np.ceil(len(dim_obs) / num_cols)) fig, axs = plt.subplots(num_rows, num_cols, figsize=(num_cols * 5, num_rows * 3), tight_layout=True) axs = np.atleast_2d(axs) axs = correct_atleast_2d(axs) fig.canvas.manager.set_window_title("Observations over Time") colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(dim_obs))) if len(dim_obs) == 1: axs[0, 0].plot(t, ro.observations[:, dim_obs[0]], label=_get_obs_label(ro, dim_obs[0])) axs[0, 0].legend() axs[0, 0].plot(t, ro.observations[:, dim_obs[0]], label=_get_obs_label(ro, dim_obs[0])) axs[0, 0].legend() else: for i in range(num_rows): for j in range(num_cols): if j + i * num_cols < len(dim_obs): # Omit the last observation for simplicity axs[i, j].plot(t, ro.observations[:, j + i * num_cols], c=colors[j + i * num_cols]) axs[i, j].set_ylabel(_get_obs_label(ro, j + i * num_cols)) else: # We might create more subplots than there are observations axs[i, j].remove()
def plot_observations_actions_rewards(ro: StepSequence): """ Plot all observation, action, and reward trajectories of the given rollout. :param ro: input rollout """ if hasattr(ro, "observations") and hasattr(ro, "actions") and hasattr( ro, "env_infos"): if not isinstance(ro.observations, np.ndarray): raise pyrado.TypeErr(given=ro.observations, expected_type=np.ndarray) if not isinstance(ro.actions, np.ndarray): raise pyrado.TypeErr(given=ro.actions, expected_type=np.ndarray) dim_obs = ro.observations.shape[1] dim_act = ro.actions.shape[1] # Use recorded time stamps if possible t = getattr(ro, "time", np.arange(0, ro.length + 1)) num_rows, num_cols = num_rows_cols_from_length(dim_obs + dim_act + 1, transposed=True) fig, axs = plt.subplots(num_rows, num_cols, figsize=(14, 10), tight_layout=True) axs = np.atleast_2d(axs) axs = correct_atleast_2d(axs) fig.canvas.manager.set_window_title( "Observations, Actions, and Reward over Time") colors = plt.get_cmap("tab20")(np.linspace( 0, 1, dim_obs if dim_obs > dim_act else dim_act)) # Observations (without the last time step) for idx_o in range(dim_obs): ax = axs[idx_o // num_cols, idx_o % num_cols] if isinstance(axs, np.ndarray) else axs ax.plot(t, ro.observations[:, idx_o], c=colors[idx_o]) ax.set_ylabel(_get_obs_label(ro, idx_o)) # Actions for idx_a in range(dim_obs, dim_obs + dim_act): ax = axs[idx_a // num_cols, idx_a % num_cols] if isinstance(axs, np.ndarray) else axs ax.plot(t[:len(ro.actions[:, idx_a - dim_obs])], ro.actions[:, idx_a - dim_obs], c=colors[idx_a - dim_obs]) ax.set_ylabel(_get_act_label(ro, idx_a - dim_obs)) # action_labels = env.unwrapped.action_space.labels; label=action_labels[0] # Rewards ax = axs[num_rows - 1, num_cols - 1] if isinstance(axs, np.ndarray) else axs ax.plot(t[:len(ro.rewards)], ro.rewards, c="k") ax.set_ylabel("reward") ax.set_xlabel("time") plt.subplots_adjust(hspace=0.5)
def plot_features(ro: StepSequence, policy: Policy): """ Plot all features given the policy and the observation trajectories. :param policy: linear policy used during the rollout :param ro: input rollout """ if not isinstance(policy, LinearPolicy): print_cbt( "Plotting of the feature values is only supports linear policies!", "r") return if hasattr(ro, "observations"): # Use recorded time stamps if possible t = getattr(ro, "time", np.arange(0, ro.length + 1))[:-1] # Recover the features from the observations feat_vals = policy.eval_feats(to.from_numpy(ro.observations)) dim_feat = range(feat_vals.shape[1]) if len(dim_feat) <= 6: divisor = 2 elif len(dim_feat) <= 12: divisor = 4 else: divisor = 8 num_cols = int(np.ceil(len(dim_feat) / divisor)) num_rows = int(np.ceil(len(dim_feat) / num_cols)) fig, axs = plt.subplots(num_rows, num_cols, figsize=(num_cols * 5, num_rows * 3), tight_layout=True) axs = np.atleast_2d(axs) axs = correct_atleast_2d(axs) fig.canvas.manager.set_window_title("Feature Values over Time") plt.subplots_adjust(hspace=0.5) colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(dim_feat))) if len(dim_feat) == 1: axs[0, 0].plot(t, feat_vals[:-1, dim_feat[0]], label=_get_obs_label(ro, dim_feat[0])) axs[0, 0].legend() else: for i in range(num_rows): for j in range(num_cols): if j + i * num_cols < len(dim_feat): # Omit the last observation for simplicity axs[i, j].plot(t, feat_vals[:-1, j + i * num_cols], c=colors[j + i * num_cols]) axs[i, j].set_ylabel(rf"$\phi_{{{j + i*num_cols}}}$") else: # We might create more subplots than there are observations axs[i, j].remove()
def plot_mean_std_across_rollouts( rollouts: Sequence[StepSequence], idcs_obs: Optional[Sequence[int]] = None, idcs_act: Optional[Sequence[int]] = None, show_applied_actions: bool = True, ): """ Plot the mean and standard deviation across a selection of rollouts. :param rollouts: list of rollouts, they can be of unequal length but are assumed to be from the same type of env :param idcs_obs: indices of the observations to process and plot, pass `None` to select all :param idcs_act: indices of the actions to process and plot, pass `None` to select all :param show_applied_actions: if `True` show the actions applied to the environment insead of the commanded ones """ act_key = "actions_applied" if show_applied_actions else "actions" dim_obs = rollouts[0].observations.shape[ 1] # assuming same for all rollouts dim_act = rollouts[0].actions.shape[1] # assuming same for all rollouts if idcs_obs is None: idcs_obs = np.arange(dim_obs) if idcs_act is None: idcs_act = np.arange(dim_act) max_len = 0 time = None data_obs = pd.DataFrame() data_act = pd.DataFrame() for ro in rollouts: ro.numpy() if len(ro) > max_len: # Extract time max_len = len(ro) time = getattr(ro, "time", None) # Extract observations df = pd.DataFrame(ro.observations[:, idcs_obs], columns=[_get_obs_label(ro, i) for i in idcs_obs]) data_obs = pd.concat([data_obs, df], axis=1) # Extract actions df = pd.DataFrame(ro.get_data_values(act_key)[:, idcs_act], columns=[_get_act_label(ro, i) for i in idcs_act]) data_act = pd.concat([data_act, df], axis=1) # Compute statistics means_obs = data_obs.groupby(by=data_obs.columns, axis=1).mean() stds_obs = data_obs.groupby(by=data_obs.columns, axis=1).std() means_act = data_act.groupby(by=data_act.columns, axis=1).mean() stds_act = data_act.groupby(by=data_act.columns, axis=1).std() # Create figure num_rows, num_cols = num_rows_cols_from_length(len(idcs_obs), transposed=True) fig_obs, axs_obs = plt.subplots(num_rows, num_cols, figsize=(18, 9), tight_layout=True) axs_obs = np.atleast_2d(axs_obs) axs_obs = correct_atleast_2d(axs_obs) fig_obs.canvas.set_window_title( "Mean And 2 Standard Deviations of the Observations over Time") colors = plt.get_cmap("tab20")(np.linspace(0, 1, len(idcs_obs))) # Plot observations for idx_o, c in enumerate(data_obs.columns.unique()): ax = axs_obs[idx_o // num_cols, idx_o % num_cols] if isinstance(axs_obs, np.ndarray) else axs_obs # Plot means and stds draw_curve( "mean_std", axs_obs[idx_o // num_cols, idx_o % num_cols] if isinstance(axs_obs, np.ndarray) else axs_obs, pd.DataFrame(dict(mean=means_obs[c], std=stds_obs[c])), x_grid=time if time is not None else np.arange(len(data_obs)), show_legend=False, x_label="time [s]" if time is not None else "steps [-]", y_label=str(c), plot_kwargs=dict(color=colors[idx_o]), ) # Plot individual rollouts ax.plot(time if time is not None else np.arange(len(data_obs)), data_obs[c], c="gray", ls="--") # Plot actions num_rows, num_cols = num_rows_cols_from_length(dim_act, transposed=True) fig_act, axs_act = plt.subplots(num_rows, num_cols, figsize=(18, 9), tight_layout=True) axs_act = np.atleast_2d(axs_act) axs_act = correct_atleast_2d(axs_act) fig_act.canvas.set_window_title( "Mean And 2 Standard Deviations of the Actions over Time") colors = plt.get_cmap("tab20")(np.linspace(0, 1, dim_act)) for idx_a, c in enumerate(data_act.columns.unique()): ax = axs_act[idx_a // num_cols, idx_a % num_cols] if isinstance(axs_act, np.ndarray) else axs_act draw_curve( "mean_std", ax, pd.DataFrame(dict(mean=means_act[c], std=stds_act[c])), x_grid=time[:-1] if time is not None else np.arange(len(data_act)), show_legend=False, x_label="time [s]" if time is not None else "steps [-]", y_label=str(c), plot_kwargs=dict(color=colors[idx_a]), ) # Plot individual rollouts ax.plot(time[:-1] if time is not None else np.arange(len(data_act)), data_act[c], c="gray", ls="--")
def plot_actions(ro: StepSequence, env: Env): """ Plot all action trajectories of the given rollout. :param ro: input rollout :param env: environment (used for getting the clipped action values) """ if hasattr(ro, "actions"): if not isinstance(ro.actions, np.ndarray): raise pyrado.TypeErr(given=ro.actions, expected_type=np.ndarray) dim_act = ro.actions.shape[1] # Use recorded time stamps if possible t = getattr(ro, "time", np.arange(0, ro.length + 1))[:-1] num_rows, num_cols = num_rows_cols_from_length(dim_act, transposed=True) fig, axs = plt.subplots(num_rows, num_cols, figsize=(10, 8), tight_layout=True) fig.canvas.manager.set_window_title("Actions over Time") axs = np.atleast_2d(axs) axs = correct_atleast_2d(axs) colors = plt.get_cmap("tab20")(np.linspace(0, 1, dim_act)) act_norm_wrapper = typed_env(env, ActNormWrapper) if act_norm_wrapper is not None: lb, ub = inner_env(env).act_space.bounds act_denorm = lb + (ro.actions + 1.0) * (ub - lb) / 2 act_clipped = np.array( [inner_env(env).limit_act(a) for a in act_denorm]) else: act_denorm = ro.actions act_clipped = np.array([env.limit_act(a) for a in ro.actions]) if dim_act == 1: axs[0, 0].plot(t, act_denorm, label="to env") axs[0, 0].plot(t, act_clipped, label="clipped", c="k", ls="--") axs[0, 0].legend(ncol=2) axs[0, 0].set_ylabel(_get_act_label(ro, 0)) else: for idx_a in range(dim_act): axs[idx_a // num_cols, idx_a % num_cols].plot(t, act_denorm[:, idx_a], label="to env", c=colors[idx_a]) axs[idx_a // num_cols, idx_a % num_cols].plot(t, act_clipped[:, idx_a], label="clipped", c="k", ls="--") axs[idx_a // num_cols, idx_a % num_cols].legend(ncol=2) axs[idx_a // num_cols, idx_a % num_cols].set_ylabel(_get_act_label(ro, idx_a)) # Put legends to the right of the plot if dim_act < 8: # otherwise it gets too cluttered for a in fig.get_axes(): a.legend(ncol=2) plt.subplots_adjust(hspace=0.2)
def __init__( self, spec: EnvSpec, dt: float, t_end: float, cond_lvl: str, cond_final: Optional[Union[to.Tensor, List[float], List[List[float]]]] = None, cond_init: Optional[Union[to.Tensor, List[float], List[List[float]]]] = None, t_init: float = 0.0, overtime_behavior: str = "hold", init_param_kwargs: Optional[dict] = None, use_cuda: bool = False, ): """ Constructor :param spec: environment specification :param dt: time step [s] :param t_end: final time [s], relative to `t_init` :param cond_lvl: highest level of the condition, so far, only velocity 'vel' and acceleration 'acc' level conditions on the polynomial are supported. These need to be consistent with the actions. :param cond_final: final condition for the least squares proble,, needs to be of shape [X, dim_act] where X is 2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'` :param cond_init: initial condition for the least squares proble,, needs to be of shape [X, dim_act] where X is 2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'` :param t_init: initial time [s], also used on calling `reset()`, relative to `t_end` :param overtime_behavior: determines how the policy acts when `t > t_end`, e.g. 'hold' to keep the last action :param init_param_kwargs: additional keyword arguments for the policy parameter initialization :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU """ if t_end <= t_init: raise pyrado.ValueErr(given=t_end, g_constraint=t_init) if not overtime_behavior.lower() in ["hold", "zero"]: raise pyrado.ValueErr(given=overtime_behavior, eq_constraint=("hold", "zero")) # Call Policy's constructor super().__init__(spec, use_cuda) self._dt = float(dt) self._t_end = float(t_end) self._t_init = float(t_init) self._t_curr = float(t_init) self._overtime_behavior = overtime_behavior.lower() # Determine the initial and final conditions used to compute the coefficients of the polynomials if cond_lvl.lower() == "vel": self._order = 3 elif cond_lvl.lower() == "acc": self._order = 5 else: raise pyrado.ValueErr(given=cond_lvl, eq_constraint="'vel' or 'acc'") num_cond = (self._order + 1) // 2 if cond_final is not None: # Given initialization rand_init = False cond_final = to.as_tensor(cond_final, dtype=to.get_default_dtype()) cond_final = correct_atleast_2d(to.atleast_2d(cond_final)) if cond_final.shape != (num_cond, spec.act_space.flat_dim): raise pyrado.ShapeErr(given=cond_final, expected_match=(num_cond, spec.act_space.flat_dim)) else: # Empty initialization rand_init = True cond_final = to.empty(num_cond, spec.act_space.flat_dim) if cond_init is not None: # Given initialization cond_init = to.as_tensor(cond_init, dtype=to.get_default_dtype()) cond_init = correct_atleast_2d(to.atleast_2d(cond_init)) if cond_init.shape != (num_cond, spec.act_space.flat_dim): raise pyrado.ShapeErr(given=cond_init, expected_match=(num_cond, spec.act_space.flat_dim)) else: # Zero initialization cond_init = to.zeros(num_cond, spec.act_space.flat_dim) conds = to.cat([cond_init, cond_final], dim=0) assert conds.shape[0] in [4, 6] # Define the policy parameters self.conds = nn.Parameter(conds, requires_grad=False) # Store the polynomial coefficients for each output dimension in a matrix self.coeffs = to.empty(self._order + 1, spec.act_space.flat_dim, device=self.device) if rand_init: # Call custom initialization function after PyTorch network parameter initialization init_param_kwargs = init_param_kwargs if init_param_kwargs is not None else dict( ) self.init_param(None, **init_param_kwargs) else: # Compute the coefficients to match the given (initial and) final conditions self._compute_coefficients() self.to(self.device)
def __init__( self, spec: EnvSpec, dt: float, t_end: float, cond_lvl: str, cond_final: Union[to.Tensor, List[float], List[List[float]]], cond_init: Union[to.Tensor, List[float], List[List[float]]], t_init: float = 0.0, overtime_behavior: str = "hold", ): """ In contrast to PolySplineTimePolicy, this constructor needs to be called with learned / working values for `cond_final` and `cond_init`. :param spec: environment specification :param dt: time step [s] :param t_end: final time [s], relative to `t_init` :param cond_lvl: highest level of the condition, so far, only velocity 'vel' and acceleration 'acc' level conditions on the polynomial are supported. These need to be consistent with the actions. :param cond_final: final condition for the least squares proble,, needs to be of shape [X, dim_act] where X is 2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'` :param cond_init: initial condition for the least squares proble,, needs to be of shape [X, dim_act] where X is 2 if `cond_lvl == 'vel'` and 4 if `cond_lvl == 'acc'` :param t_init: initial time [s], also used on calling `reset()`, relative to `t_end` :param overtime_behavior: determines how the policy acts when `t > t_end`, e.g. 'hold' to keep the last action """ super().__init__() # Setup attributes self.input_size = spec.obs_space.flat_dim self.output_size = spec.act_space.flat_dim self.dt = float(dt) self.t_end = float(t_end) self.t_init = float(t_init) self.t_curr = float(t_init) self.overtime_behavior = overtime_behavior.lower() # Could not be converted self.act_space_shape = spec.act_space.shape self.act_space_flat_dim = spec.act_space.flat_dim # Determine the initial and final conditions used to compute the coefficients of the polynomials if cond_lvl.lower() == "vel": self.order = 3 elif cond_lvl.lower() == "acc": self.order = 5 else: raise pyrado.ValueErr(given=cond_lvl, eq_constraint="'vel' or 'acc'") num_cond = (self.order + 1) // 2 cond_final = to.as_tensor(cond_final, dtype=to.get_default_dtype()) cond_final = correct_atleast_2d(to.atleast_2d(cond_final)) if cond_final.shape != (num_cond, spec.act_space.flat_dim): raise pyrado.ShapeErr(given=cond_final, expected_match=(num_cond, spec.act_space.flat_dim)) cond_init = to.as_tensor(cond_init, dtype=to.get_default_dtype()) cond_init = correct_atleast_2d(to.atleast_2d(cond_init)) if cond_init.shape != (num_cond, spec.act_space.flat_dim): raise pyrado.ShapeErr(given=cond_init, expected_match=(num_cond, spec.act_space.flat_dim)) self.conds = to.cat([cond_init, cond_final], dim=0) assert self.conds.shape[0] in [4, 6] # Store the polynomial coefficients for each output dimension in a matrix self.coeffs = to.empty(self.order + 1, spec.act_space.flat_dim) self.compute_coefficients()
def test_correct_atleast_2d(x): x_corrected = correct_atleast_2d(x) assert x_corrected.shape[0] == len(x)