def __init__(self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, device: Union[th.device, str] = 'auto'): super(Actor, self).__init__(observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, device=device, squash_output=True) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim if self.use_sde: latent_sde_dim = last_layer_dim # Separate feature extractor for gSDE if sde_net_arch is not None: self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch, activation_fn) self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln, learn_features=True, squash_output=True) self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim, latent_sde_dim=latent_sde_dim, log_std_init=log_std_init) # Avoid numerical issues by limiting the mean of the Gaussian # to be in [-clip_mean, clip_mean] if clip_mean > 0.0: self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean)) else: self.action_dist = SquashedDiagGaussianDistribution(action_dim) self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)
def process(file, case_number, runs, verbose): print(f"File: {file}") agent = load_agent(file) name = f"{file.split('/')[-1].split('.')[0]}" print(f"Name: {name}") agent.name = name env = Monitor(get_env(case_number)) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) print(f" --> Testing...") test_agent(agent, env, runs, verbose)
def create_agent(env, name, case_number, layers, verbose): # return DQN.load("./saved_agents/d1a_DQN_0.zip", env=env) agent = A2C( policy=MlpPolicy, env=env, learning_rate=1.0e-3, n_steps=env.unwrapped.envs[0].unwrapped._max_steps, gamma=1.0, gae_lambda=1.0, ent_coef=0.0, vf_coef=0.5, max_grad_norm=0.5, rms_prop_eps=1e-5, use_rms_prop=True, use_sde=False, sde_sample_freq=-1, normalize_advantage=False, tensorboard_log="./tblog", create_eval_env=True, policy_kwargs=dict( net_arch=[dict(vf=layers, pi=layers)], activation_fn=th.nn.LeakyReLU, ortho_init=True, log_std_init=1.0, full_std=True, sde_net_arch=None, use_expln=False, squash_output=True, features_extractor_class=FlattenExtractor, features_extractor_kwargs=dict(), normalize_images=False, optimizer_class=th.optim.Adam, optimizer_kwargs=dict( betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, ), ), verbose=verbose, seed=case_number, device="cpu", _init_setup_model=True, ) agent.name = name agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) writer = SummaryWriter(log_dir=agent.tensorboard_log + "/" + agent.name + "_1") writer.add_graph(agent.policy, th.as_tensor(np.zeros((1, 8))).to(agent.policy.device)) writer.close() return agent
def test_squashed_gaussian(model_class): """ Test run with squashed Gaussian (notably entropy computation) """ model = model_class('MlpPolicy', 'Pendulum-v0', use_sde=True, n_steps=100, policy_kwargs=dict(squash_output=True)) model.learn(500) gaussian_mean = th.rand(N_SAMPLES, N_ACTIONS) dist = SquashedDiagGaussianDistribution(N_ACTIONS) _, log_std = dist.proba_distribution_net(N_FEATURES) dist = dist.proba_distribution(gaussian_mean, log_std) actions = dist.get_actions() assert th.max(th.abs(actions)) <= 1.0
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0', use_perturbations=True, perturb_action=True) env = NormalizeObservationSpace( env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action time = np.array( list( map( lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[ 0].getDate()), hist_sc_state))) / 3600.0 # Convert to hours a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0 # Convert to km e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state))) mass = np.array( list(map(lambda sc_state: sc_state.getMass(), hist_sc_state))) ra = a * (1.0 + e) rp = a * (1.0 - e) env2 = gym.make('PerigeeRaising-Continuous3D-v0') env2 = NormalizeObservationSpace( env2, lambda o: o / env2.unwrapped.observation_space.high) env2 = Monitor(env2) env2.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env2, n_eval_episodes=1) hist_sc_state2 = env2.unwrapped.hist_sc_state hist_action2 = env2.unwrapped.hist_action time2 = np.array( list( map( lambda sc_state: sc_state.getDate().durationFrom( hist_sc_state2[0].getDate()), hist_sc_state2))) / 3600.0 # Convert to hours a2 = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state2))) / 1000.0 # Convert to km e2 = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state2))) mass2 = np.array( list(map(lambda sc_state: sc_state.getMass(), hist_sc_state2))) ra2 = a2 * (1.0 + e2) rp2 = a2 * (1.0 - e2) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("ra (km)") l2, = axs.plot(time2, ra2, "--") l2.set_color("#777777") axs.plot(time, ra, "k") axs.legend(["Planned", "Real"], loc='upper left') plt.tight_layout() fig.savefig("real_ra.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("rp (km)") l2, = axs.plot(time2, rp2, "--") l2.set_color("#777777") axs.plot(time, rp, "k") axs.legend(["Planned", "Real"], loc='upper left') plt.tight_layout() fig.savefig("real_rp.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(mass[0] - 0.04, mass[0]) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("mass (kg)") l2, = axs.plot(time2, mass2, "--") l2.set_color("#777777") axs.plot(time, mass, "k") axs.legend(["Planned", "Real"], loc='upper right') plt.tight_layout() fig.savefig("real_m.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain') axs.set_xlim(time[0], time[-1]) axs.set_ylim(-1.3, 1.3) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("action") l1, l2, l3 = axs.plot(time[0:-1], hist_action) l1.set_color("#000000") l2.set_color("#777777") l3.set_color("#BBBBBB") axs.legend(["Act1", "Act2", "Act3"], loc='upper left') plt.tight_layout() fig.savefig("real_action.pdf", format="pdf") plt.close(fig)
assert th.allclose(entropy.mean(), -log_prob.mean(), rtol=5e-3) @pytest.mark.parametrize( "dist_type", [ BernoulliDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS)), CategoricalDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS)), DiagGaussianDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS), th.rand(N_ACTIONS)), MultiCategoricalDistribution( [N_ACTIONS, N_ACTIONS]).proba_distribution( th.rand(1, sum([N_ACTIONS, N_ACTIONS]))), SquashedDiagGaussianDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS), th.rand(N_ACTIONS)), StateDependentNoiseDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS), th.rand([N_ACTIONS, N_ACTIONS]), th.rand([N_ACTIONS, N_ACTIONS])), ], ) def test_kl_divergence(dist_type): set_random_seed(8) # Test 1: same distribution should have KL Div = 0 dist1 = dist_type dist2 = dist_type # PyTorch implementation of kl_divergence doesn't sum across dimensions assert th.allclose(kl_divergence(dist1, dist2).sum(), th.tensor(0.0)) # Test 2: KL Div = E(Unbiased approx KL Div) if isinstance(dist_type, CategoricalDistribution):
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0') env.unwrapped._ref_sv[2] = 0.0 env.unwrapped._ref_sv[3] = 0.0 env.unwrapped._ref_sv[4] = 0.0 env = NormalizeObservationSpace( env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action x = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getX(), hist_sc_state))) / 1000.0 # Convert to km y = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getY(), hist_sc_state))) / 1000.0 # Convert to km env2 = gym.make('PerigeeRaising-Continuous3D-v0') env2.unwrapped._ref_sv[0] = 11000000.0 / 1.05 env2.unwrapped._ref_sv[1] = 0.05 env2.unwrapped._ref_sv[2] = 0.0 env2.unwrapped._ref_sv[3] = 0.0 env2.unwrapped._ref_sv[4] = 0.0 env2 = NormalizeObservationSpace( env2, lambda o: o / env2.unwrapped.observation_space.high) env2 = Monitor(env2) env2.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution( get_action_dim(env.action_space)) evaluate_policy(agent, env2, n_eval_episodes=1) hist_sc_state2 = env2.unwrapped.hist_sc_state hist_action2 = env2.unwrapped.hist_action x2 = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getX(), hist_sc_state2))) / 1000.0 # Convert to km y2 = np.array( list( map( lambda sc_state: sc_state.getPVCoordinates().getPosition(). getY(), hist_sc_state2))) / 1000.0 # Convert to km fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.set_xlim(-12000, 12000) axs.set_ylim(-12000, 12000) axs.grid(False) axs.plot(x, y, "k", zorder=2) l2, = axs.plot(x2, y2, zorder=1) l2.set_color("#777777") axs.legend(["Before", "After"], loc='upper right', frameon=False, bbox_to_anchor=(0.0, 1.0)) im = mpimg.imread('earth.png') plt.imshow(im, extent=[-6400, 6400, -6400, 6400], interpolation="none") axs.set_aspect('equal') plt.text(11000, 0, "Pericenter") plt.text(-18500, 0, "Apocenter") plt.axis('off') plt.tight_layout() fig.savefig("orbit.pdf", format="pdf") plt.close(fig)
def process(file): env = gym.make('PerigeeRaising-Continuous3D-v0') env = NormalizeObservationSpace(env, lambda o: o / env.unwrapped.observation_space.high) env = Monitor(env) env.seed(42) agent = A2C.load(file) agent.policy.action_dist = SquashedDiagGaussianDistribution(get_action_dim(env.action_space)) evaluate_policy(agent, env, n_eval_episodes=1) hist_sc_state = env.unwrapped.hist_sc_state hist_action = env.unwrapped.hist_action time = np.array(list(map(lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[0].getDate()), hist_sc_state))) / 3600.0 # Convert to hours a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0 # Convert to km e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state))) mass = np.array(list(map(lambda sc_state: sc_state.getMass(), hist_sc_state))) ra = a * (1.0 + e) rp = a * (1.0 - e) v = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getVelocity().toArray(), hist_sc_state))) h = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getMomentum().toArray(), hist_sc_state))) angle_f_v = list(map(lambda q: np.degrees(np.arccos( np.dot(q[0], q[1]) / np.linalg.norm(q[0]) / (np.linalg.norm(q[1]) + 1e-10) )), zip(v, hist_action))) hist_action_plane = list(map(lambda q: q[1] - np.dot(q[1], q[0]) * q[0] / (np.linalg.norm(q[0]) ** 2), zip(h, hist_action))) angle_fp_v = list(map(lambda q: np.degrees(np.arccos( np.dot(q[0], q[1] * [1, 1, 0]) / np.linalg.norm(q[0]) / ( np.linalg.norm(q[1] * [1, 1, 0]) + 1e-10) )), zip(v, hist_action_plane))) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("ra (km)") axs.plot(time, ra, "k") plt.tight_layout() fig.savefig("plan_ra.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("rp (km)") axs.plot(time, rp, "k") plt.tight_layout() fig.savefig("plan_rp.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0]) axs.set_xlim(time[0], time[-1]) axs.set_ylim(mass[0] - 0.04, mass[0]) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("mass (kg)") axs.plot(time, mass, "k") plt.tight_layout() fig.savefig("plan_m.pdf", format="pdf") plt.close(fig) fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0)) axs.ticklabel_format(axis='y', style='plain') axs.set_xlim(time[0], time[-1]) axs.set_ylim(-1.3, 1.3) axs.grid(True) axs.set_xlabel("time (h)") axs.set_ylabel("action") l1, l2, l3 = axs.plot(time[0:-1], hist_action, "k") l1.set_color("#000000") l2.set_color("#777777") l3.set_color("#BBBBBB") axs.legend(["Act1", "Act2", "Act3"], loc='upper left') plt.tight_layout() fig.savefig("plan_action.pdf", format="pdf") plt.close(fig)