def make_proba_distribution(action_space: gym.spaces.Space, use_sde: bool = False, dist_kwargs: Optional[Dict[str, Any]] = None) -> Distribution: """ Return an instance of Distribution for the correct type of action space :param action_space: (gym.spaces.Space) the input action space :param use_sde: (bool) Force the use of StateDependentNoiseDistribution instead of DiagGaussianDistribution :param dist_kwargs: (Optional[Dict[str, Any]]) Keyword arguments to pass to the probability distribution :return: (Distribution) the appropriate Distribution object """ if dist_kwargs is None: dist_kwargs = {} if isinstance(action_space, spaces.Box): assert len(action_space.shape) == 1, "Error: the action space must be a vector" if use_sde: return StateDependentNoiseDistribution(get_action_dim(action_space), **dist_kwargs) return DiagGaussianDistribution(get_action_dim(action_space), **dist_kwargs) elif isinstance(action_space, spaces.Discrete): return CategoricalDistribution(action_space.n, **dist_kwargs) # elif isinstance(action_space, spaces.MultiDiscrete): # return MultiCategoricalDistribution(action_space.nvec, **dist_kwargs) # elif isinstance(action_space, spaces.MultiBinary): # return BernoulliDistribution(action_space.n, **dist_kwargs) else: raise NotImplementedError("Error: probability distribution, not implemented for action space" f"of type {type(action_space)}." " Must be of type Gym Spaces: Box, Discrete, MultiDiscrete or MultiBinary.")
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Critic, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device) action_dim = get_action_dim(self.action_space) q1_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.q1_net = nn.Sequential(*q1_net) q2_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.q2_net = nn.Sequential(*q2_net) self.q_networks = [self.q1_net, self.q2_net]
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) # Save arguments to re-create object at loading self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim self.mu = nn.Linear(last_layer_dim, self.action_dim)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, n_quantiles: int = 25, n_critics: int = 2, share_features_extractor: bool = True, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) action_dim = get_action_dim(self.action_space) self.share_features_extractor = share_features_extractor self.q_networks = [] self.n_quantiles = n_quantiles self.n_critics = n_critics self.quantiles_total = n_quantiles * n_critics for i in range(n_critics): qf_net = create_mlp(features_dim + action_dim, n_quantiles, net_arch, activation_fn) qf_net = nn.Sequential(*qf_net) self.add_module(f"qf{i}", qf_net) self.q_networks.append(qf_net)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, n_critics: int = 2, film_critic: bool = False, num_env_params: int = 0, share_features_extractor: bool = True, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) action_dim = get_action_dim(self.action_space) self.share_features_extractor = share_features_extractor self.n_critics = n_critics self.q_networks = [] self.film = film_critic for idx in range(n_critics): q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) q_net = nn.Sequential(*q_net) self.add_module(f"qf{idx}", q_net) self.q_networks.append(q_net)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, device: Union[th.device, str] = "auto", n_critics: int = 2, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, ) action_dim = get_action_dim(self.action_space) self.n_critics = n_critics self.q_networks = [] for idx in range(n_critics): q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) q_net = nn.Sequential(*q_net) self.add_module(f"qf{idx}", q_net) self.q_networks.append(q_net)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, features_extractor: nn.Module, features_dim: int, latent_dim: int, hidden_dim: int, net_arch: Optional[List[int]] = None, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(Controller, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) if net_arch is None: net_arch = [64] self.net_arch = net_arch self.activation_fn = activation_fn self.features_extractor = features_extractor self.features_dim = features_dim self.latent_dim = latent_dim self.hidden_dim = hidden_dim self.normalize_images = normalize_images action_dim = get_action_dim(self.action_space) self.fc = nn.Linear(self.latent_dim + self.hidden_dim, action_dim)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) self.features_extractor = features_extractor self.normalize_images = normalize_images self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn action_dim = get_action_dim(self.action_space) actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True) # Deterministic action self.mu = nn.Sequential(*actor_net)
def _initialize(self): self.dense1 = tf.keras.layers.Dense( 400, input_shape=(get_flattened_obs_dim(TaskEnv.observation_space) + get_action_dim(TaskEnv.action_space), ), activation='relu') self.dense2 = tf.keras.layers.Dense(300, activation='relu') self.dense3 = tf.keras.layers.Dense(1)
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Actor, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim if self.use_sde: latent_sde_dim = last_layer_dim # Separate feature extractor for gSDE if sde_net_arch is not None: self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch, activation_fn) self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln, learn_features=True, squash_output=True) self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim, latent_sde_dim=latent_sde_dim, log_std_init=log_std_init) # Avoid numerical issues by limiting the mean of the Gaussian # to be in [-clip_mean, clip_mean] if clip_mean > 0.0: self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean)) else: self.action_dist = SquashedDiagGaussianDistribution(action_dim) self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)
def process(file, case_number, runs, verbose): print(f"File: {file}") agent = load_agent(file) name = f"{file.split('/')[-1].split('.')[0]}" print(f"Name: {name}") agent.name = name env = Monitor(get_env(case_number)) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) print(f" --> Testing...") test_agent(agent, env, runs, verbose)
def create_agent(env, name, case_number, layers, verbose): # return DQN.load("./saved_agents/d1a_DQN_0.zip", env=env) agent = A2C( policy=MlpPolicy, env=env, learning_rate=1.0e-3, n_steps=env.unwrapped.envs[0].unwrapped._max_steps, gamma=1.0, gae_lambda=1.0, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, rms_prop_eps=1e-5, use_rms_prop=True, use_sde=False, sde_sample_freq=-1, normalize_advantage=False, tensorboard_log="./tblog", create_eval_env=True, policy_kwargs=dict( net_arch=[dict(vf=layers, pi=layers)], activation_fn=th.nn.LeakyReLU, ortho_init=True, log_std_init=1.0, full_std=True, sde_net_arch=None, use_expln=False, squash_output=True, features_extractor_class=FlattenExtractor, features_extractor_kwargs=dict(), normalize_images=False, optimizer_class=th.optim.Adam, optimizer_kwargs=dict( betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, ), ), verbose=verbose, seed=case_number, device="cpu", _init_setup_model=True, ) agent.name = name agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) writer = SummaryWriter(log_dir=agent.tensorboard_log + "/" + agent.name + "_1") writer.add_graph(agent.policy, th.as_tensor(np.zeros((1, 8))).to(agent.policy.device)) writer.close() return agent
def __init__(self): super().__init__() self._initialize() # Initialize parameters by calling self.call( tf.constant( np.zeros( shape=(1, get_flattened_obs_dim(TaskEnv.observation_space)))), tf.constant( np.zeros(shape=(1, get_action_dim(TaskEnv.action_space))))) self.loss_function_id = CRITIC_LOSS
def __init__(self, buffer_size: int, observation_space: spaces.Space, action_space: spaces.Space, device: Union[th.device, str] = 'cpu', n_envs: int = 1): super(BaseBuffer, self).__init__() self.buffer_size = buffer_size self.observation_space = observation_space self.action_space = action_space self.obs_shape = get_obs_shape(observation_space) self.action_dim = get_action_dim(action_space) self.pos = 0 self.full = False self.device = device self.n_envs = n_envs
def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Actor, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)
def __init__( self, observation_space: spaces.Space, action_space: spaces.Space, device: Union[th.device, str] = "cpu", gae_lambda: float = 1, gamma: float = 0.99, num_trajectories: int = 20 # TODO: put as a parameter ): self.observation_space = observation_space self.action_space = action_space self.obs_shape = get_obs_shape(observation_space) self.action_dim = get_action_dim(action_space) self.full = False self.device = device self.gae_lambda = gae_lambda self.gamma = gamma self.num_trajectories = num_trajectories self.traj_idx = 0 self.live_agents : Dict[int, int] = {} # env agent-id -> buffer-unique self.trajectories : Dict[int, TrajectoryBufferSamples] = {} # buffer-unique id -> trajectory self.num_done_trajectories = 0
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, share_features_extractor: bool = True, action_dist_num=32, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) self.share_features_extractor = share_features_extractor self.n_cos = 64 self.action_dist_num = action_dist_num self.first_hidden_size = net_arch[0] self.pis = th.FloatTensor([ np.pi * i for i in range(1, self.n_cos + 1) ]).view(1, 1, self.n_cos).to(self.device) action_dim = get_action_dim(self.action_space) net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) self.net_head = nn.Sequential(*net[0:2]) self.net_cos_embedding = nn.Sequential( nn.Linear(self.n_cos, self.first_hidden_size), activation_fn()) self.net_out = nn.Sequential(*net[2:]) self.add_module("net_head", self.net_head) self.add_module("net_cos_embedding", self.net_cos_embedding) self.add_module("net_out", self.net_out)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, mask_policy: int = 0, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn action_dim = get_action_dim(self.action_space) actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True) # Deterministic action self.mu = nn.Sequential(*actor_net) policy_mask = [True] * features_dim for i in range(1, mask_policy + 1): policy_mask[-i] = False self.policy_mask = th.Tensor(policy_mask)
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0') env = NormalizeObservationSpace(env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution(get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action time = np.array(list(map(lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[0].getDate()), hist_sc_state))) / 3600.0 # Convert to hours a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0 # Convert to km e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state))) mass = np.array(list(map(lambda sc_state: sc_state.getMass(), hist_sc_state))) ra = a * (1.0 + e) rp = a * (1.0 - e) v = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getVelocity().toArray(), hist_sc_state))) h = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getMomentum().toArray(), hist_sc_state))) angle_f_v = list(map(lambda q: np.degrees(np.arccos( np.dot(q[0], q[1]) / np.linalg.norm(q[0]) / (np.linalg.norm(q[1]) + 1e-10) )), zip(v, hist_action))) hist_action_plane = list(map(lambda q: q[1] - np.dot(q[1], q[0]) * q[0] / (np.linalg.norm(q[0]) ** 2), zip(h, hist_action))) angle_fp_v = list(map(lambda q: np.degrees(np.arccos( np.dot(q[0], q[1] * [1, 1, 0]) / np.linalg.norm(q[0]) / ( np.linalg.norm(q[1] * [1, 1, 0]) + 1e-10) )), zip(v, hist_action_plane))) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("ra (km)") axs.plot(time, ra, "k") plt.tight_layout() fig.savefig("plan_ra.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("rp (km)") axs.plot(time, rp, "k") plt.tight_layout() fig.savefig("plan_rp.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(mass[0] - 0.04, mass[0]) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("mass (kg)") axs.plot(time, mass, "k") plt.tight_layout() fig.savefig("plan_m.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain') axs.set_xlim(time[0], time[-1]) axs.set_ylim(-1.3, 1.3) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("action") l1, l2, l3 = axs.plot(time[0:-1], hist_action, "k") l1.set_color("#000000") l2.set_color("#777777") l3.set_color("#BBBBBB") axs.legend(["Act1", "Act2", "Act3"], loc='upper left') plt.tight_layout() fig.savefig("plan_action.pdf", format="pdf") plt.close(fig)
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0') env.unwrapped._ref_sv[2] = 0.0 env.unwrapped._ref_sv[3] = 0.0 env.unwrapped._ref_sv[4] = 0.0 env = NormalizeObservationSpace( env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action x = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getX(), hist_sc_state))) / 1000.0 # Convert to km y = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getY(), hist_sc_state))) / 1000.0 # Convert to km env2 = gym.make('PerigeeRaising-Continuous3D-v0') env2.unwrapped._ref_sv[0] = 11000000.0 / 1.05 env2.unwrapped._ref_sv[1] = 0.05 env2.unwrapped._ref_sv[2] = 0.0 env2.unwrapped._ref_sv[3] = 0.0 env2.unwrapped._ref_sv[4] = 0.0 env2 = NormalizeObservationSpace( env2, lambda o: o / env2.unwrapped.observation_space.high) env2 = Monitor(env2) env2.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env2, n_eval_episodes=1) hist_sc_state2 = env2.unwrapped.hist_sc_state hist_action2 = env2.unwrapped.hist_action x2 = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getX(), hist_sc_state2))) / 1000.0 # Convert to km y2 = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getY(), hist_sc_state2))) / 1000.0 # Convert to km fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.set_xlim(-12000, 12000) axs.set_ylim(-12000, 12000) axs.grid(False) axs.plot(x, y, "k", zorder=2) l2, = axs.plot(x2, y2, zorder=1) l2.set_color("#777777") axs.legend(["Before", "After"], loc='upper right', frameon=False, bbox_to_anchor=(0.0, 1.0)) im = mpimg.imread('earth.png') plt.imshow(im, extent=[-6400, 6400, -6400, 6400], interpolation="none") axs.set_aspect('equal') plt.text(11000, 0, "Pericenter") plt.text(-18500, 0, "Apocenter") plt.axis('off') plt.tight_layout() fig.savefig("orbit.pdf", format="pdf") plt.close(fig)
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0', use_perturbations=True, perturb_action=True) env = NormalizeObservationSpace( env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action time = np.array( list( map( lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[ 0].getDate()), hist_sc_state))) / 3600.0 # Convert to hours a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0 # Convert to km e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state))) mass = np.array( list(map(lambda sc_state: sc_state.getMass(), hist_sc_state))) ra = a * (1.0 + e) rp = a * (1.0 - e) env2 = gym.make('PerigeeRaising-Continuous3D-v0') env2 = NormalizeObservationSpace( env2, lambda o: o / env2.unwrapped.observation_space.high) env2 = Monitor(env2) env2.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env2, n_eval_episodes=1) hist_sc_state2 = env2.unwrapped.hist_sc_state hist_action2 = env2.unwrapped.hist_action time2 = np.array( list( map( lambda sc_state: sc_state.getDate().durationFrom( hist_sc_state2[0].getDate()), hist_sc_state2))) / 3600.0 # Convert to hours a2 = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state2))) / 1000.0 # Convert to km e2 = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state2))) mass2 = np.array( list(map(lambda sc_state: sc_state.getMass(), hist_sc_state2))) ra2 = a2 * (1.0 + e2) rp2 = a2 * (1.0 - e2) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("ra (km)") l2, = axs.plot(time2, ra2, "--") l2.set_color("#777777") axs.plot(time, ra, "k") axs.legend(["Planned", "Real"], loc='upper left') plt.tight_layout() fig.savefig("real_ra.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("rp (km)") l2, = axs.plot(time2, rp2, "--") l2.set_color("#777777") axs.plot(time, rp, "k") axs.legend(["Planned", "Real"], loc='upper left') plt.tight_layout() fig.savefig("real_rp.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(mass[0] - 0.04, mass[0]) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("mass (kg)") l2, = axs.plot(time2, mass2, "--") l2.set_color("#777777") axs.plot(time, mass, "k") axs.legend(["Planned", "Real"], loc='upper right') plt.tight_layout() fig.savefig("real_m.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain') axs.set_xlim(time[0], time[-1]) axs.set_ylim(-1.3, 1.3) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("action") l1, l2, l3 = axs.plot(time[0:-1], hist_action) l1.set_color("#000000") l2.set_color("#777777") l3.set_color("#BBBBBB") axs.legend(["Act1", "Act2", "Act3"], loc='upper left') plt.tight_layout() fig.savefig("real_action.pdf", format="pdf") plt.close(fig)