Python SquashedDiagGaussianDistribution.SquashedDiagGaussianDistribution示例，stable_baselines3.common.distributions.SquashedDiagGaussianDistribution.SquashedDiagGaussianDistribution Python示例

示例#1

0

显示文件

    def __init__(self, observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 use_sde: bool = False,
                 log_std_init: float = -3,
                 full_std: bool = True,
                 sde_net_arch: Optional[List[int]] = None,
                 use_expln: bool = False,
                 clip_mean: float = 2.0,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Actor, self).__init__(observation_space, action_space,
                                    features_extractor=features_extractor,
                                    normalize_images=normalize_images,
                                    device=device,
                                    squash_output=True)

        # Save arguments to re-create object at loading
        self.use_sde = use_sde
        self.sde_features_extractor = None
        self.sde_net_arch = sde_net_arch
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn
        self.log_std_init = log_std_init
        self.sde_net_arch = sde_net_arch
        self.use_expln = use_expln
        self.full_std = full_std
        self.clip_mean = clip_mean

        action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        if self.use_sde:
            latent_sde_dim = last_layer_dim
            # Separate feature extractor for gSDE
            if sde_net_arch is not None:
                self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch,
                                                                                            activation_fn)

            self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln,
                                                               learn_features=True, squash_output=True)
            self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim,
                                                                            latent_sde_dim=latent_sde_dim,
                                                                            log_std_init=log_std_init)
            # Avoid numerical issues by limiting the mean of the Gaussian
            # to be in [-clip_mean, clip_mean]
            if clip_mean > 0.0:
                self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean))
        else:
            self.action_dist = SquashedDiagGaussianDistribution(action_dim)
            self.mu = nn.Linear(last_layer_dim, action_dim)
            self.log_std = nn.Linear(last_layer_dim, action_dim)

示例#2

0

显示文件

def process(file, case_number, runs, verbose):
    print(f"File: {file}")
    agent = load_agent(file)
    name = f"{file.split('/')[-1].split('.')[0]}"
    print(f"Name: {name}")
    agent.name = name
    env = Monitor(get_env(case_number))
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    print(f"  --> Testing...")
    test_agent(agent, env, runs, verbose)

示例#3

0

显示文件

def create_agent(env, name, case_number, layers, verbose):
    # return DQN.load("./saved_agents/d1a_DQN_0.zip", env=env)
    agent = A2C(
        policy=MlpPolicy,
        env=env,
        learning_rate=1.0e-3,
        n_steps=env.unwrapped.envs[0].unwrapped._max_steps,
        gamma=1.0,
        gae_lambda=1.0,
        ent_coef=0.0,
        vf_coef=0.5,
        max_grad_norm=0.5,
        rms_prop_eps=1e-5,
        use_rms_prop=True,
        use_sde=False,
        sde_sample_freq=-1,
        normalize_advantage=False,
        tensorboard_log="./tblog",
        create_eval_env=True,
        policy_kwargs=dict(
            net_arch=[dict(vf=layers, pi=layers)],
            activation_fn=th.nn.LeakyReLU,
            ortho_init=True,
            log_std_init=1.0,
            full_std=True,
            sde_net_arch=None,
            use_expln=False,
            squash_output=True,
            features_extractor_class=FlattenExtractor,
            features_extractor_kwargs=dict(),
            normalize_images=False,
            optimizer_class=th.optim.Adam,
            optimizer_kwargs=dict(
                betas=(0.9, 0.999),
                eps=1e-8,
                weight_decay=0,
                amsgrad=False,
            ),
        ),
        verbose=verbose,
        seed=case_number,
        device="cpu",
        _init_setup_model=True,
    )
    agent.name = name
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    writer = SummaryWriter(log_dir=agent.tensorboard_log + "/" + agent.name +
                           "_1")
    writer.add_graph(agent.policy,
                     th.as_tensor(np.zeros((1, 8))).to(agent.policy.device))
    writer.close()
    return agent

示例#4

0

显示文件

文件： test_distributions.py 项目： useric/autohedger

def test_squashed_gaussian(model_class):
    """
    Test run with squashed Gaussian (notably entropy computation)
    """
    model = model_class('MlpPolicy', 'Pendulum-v0', use_sde=True, n_steps=100, policy_kwargs=dict(squash_output=True))
    model.learn(500)

    gaussian_mean = th.rand(N_SAMPLES, N_ACTIONS)
    dist = SquashedDiagGaussianDistribution(N_ACTIONS)
    _, log_std = dist.proba_distribution_net(N_FEATURES)
    dist = dist.proba_distribution(gaussian_mean, log_std)
    actions = dist.get_actions()
    assert th.max(th.abs(actions)) <= 1.0

示例#5

0

显示文件

def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0',
                   use_perturbations=True,
                   perturb_action=True)
    env = NormalizeObservationSpace(
        env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    time = np.array(
        list(
            map(
                lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[
                    0].getDate()),
                hist_sc_state))) / 3600.0  # Convert to hours
    a = np.array(list(map(lambda sc_state: sc_state.getA(),
                          hist_sc_state))) / 1000.0  # Convert to km
    e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state)))
    mass = np.array(
        list(map(lambda sc_state: sc_state.getMass(), hist_sc_state)))
    ra = a * (1.0 + e)
    rp = a * (1.0 - e)

    env2 = gym.make('PerigeeRaising-Continuous3D-v0')
    env2 = NormalizeObservationSpace(
        env2, lambda o: o / env2.unwrapped.observation_space.high)
    env2 = Monitor(env2)
    env2.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env2, n_eval_episodes=1)

    hist_sc_state2 = env2.unwrapped.hist_sc_state
    hist_action2 = env2.unwrapped.hist_action
    time2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getDate().durationFrom(
                    hist_sc_state2[0].getDate()),
                hist_sc_state2))) / 3600.0  # Convert to hours
    a2 = np.array(list(map(lambda sc_state: sc_state.getA(),
                           hist_sc_state2))) / 1000.0  # Convert to km
    e2 = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state2)))
    mass2 = np.array(
        list(map(lambda sc_state: sc_state.getMass(), hist_sc_state2)))
    ra2 = a2 * (1.0 + e2)
    rp2 = a2 * (1.0 - e2)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("ra (km)")
    l2, = axs.plot(time2, ra2, "--")
    l2.set_color("#777777")
    axs.plot(time, ra, "k")
    axs.legend(["Planned", "Real"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_ra.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("rp (km)")
    l2, = axs.plot(time2, rp2, "--")
    l2.set_color("#777777")
    axs.plot(time, rp, "k")
    axs.legend(["Planned", "Real"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_rp.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(mass[0] - 0.04, mass[0])
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("mass (kg)")
    l2, = axs.plot(time2, mass2, "--")
    l2.set_color("#777777")
    axs.plot(time, mass, "k")
    axs.legend(["Planned", "Real"], loc='upper right')
    plt.tight_layout()
    fig.savefig("real_m.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain')
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(-1.3, 1.3)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("action")
    l1, l2, l3 = axs.plot(time[0:-1], hist_action)
    l1.set_color("#000000")
    l2.set_color("#777777")
    l3.set_color("#BBBBBB")
    axs.legend(["Act1", "Act2", "Act3"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_action.pdf", format="pdf")
    plt.close(fig)

示例#6

0

显示文件

    assert th.allclose(entropy.mean(), -log_prob.mean(), rtol=5e-3)


@pytest.mark.parametrize(
    "dist_type",
    [
        BernoulliDistribution(N_ACTIONS).proba_distribution(
            th.rand(N_ACTIONS)),
        CategoricalDistribution(N_ACTIONS).proba_distribution(
            th.rand(N_ACTIONS)),
        DiagGaussianDistribution(N_ACTIONS).proba_distribution(
            th.rand(N_ACTIONS), th.rand(N_ACTIONS)),
        MultiCategoricalDistribution(
            [N_ACTIONS, N_ACTIONS]).proba_distribution(
                th.rand(1, sum([N_ACTIONS, N_ACTIONS]))),
        SquashedDiagGaussianDistribution(N_ACTIONS).proba_distribution(
            th.rand(N_ACTIONS), th.rand(N_ACTIONS)),
        StateDependentNoiseDistribution(N_ACTIONS).proba_distribution(
            th.rand(N_ACTIONS), th.rand([N_ACTIONS, N_ACTIONS]),
            th.rand([N_ACTIONS, N_ACTIONS])),
    ],
)
def test_kl_divergence(dist_type):
    set_random_seed(8)
    # Test 1: same distribution should have KL Div = 0
    dist1 = dist_type
    dist2 = dist_type
    # PyTorch implementation of kl_divergence doesn't sum across dimensions
    assert th.allclose(kl_divergence(dist1, dist2).sum(), th.tensor(0.0))

    # Test 2: KL Div = E(Unbiased approx KL Div)
    if isinstance(dist_type, CategoricalDistribution):

示例#7

0

显示文件

def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0')
    env.unwrapped._ref_sv[2] = 0.0
    env.unwrapped._ref_sv[3] = 0.0
    env.unwrapped._ref_sv[4] = 0.0
    env = NormalizeObservationSpace(
        env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    x = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getX(), hist_sc_state))) / 1000.0  # Convert to km
    y = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getY(), hist_sc_state))) / 1000.0  # Convert to km

    env2 = gym.make('PerigeeRaising-Continuous3D-v0')
    env2.unwrapped._ref_sv[0] = 11000000.0 / 1.05
    env2.unwrapped._ref_sv[1] = 0.05
    env2.unwrapped._ref_sv[2] = 0.0
    env2.unwrapped._ref_sv[3] = 0.0
    env2.unwrapped._ref_sv[4] = 0.0
    env2 = NormalizeObservationSpace(
        env2, lambda o: o / env2.unwrapped.observation_space.high)
    env2 = Monitor(env2)
    env2.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env2, n_eval_episodes=1)

    hist_sc_state2 = env2.unwrapped.hist_sc_state
    hist_action2 = env2.unwrapped.hist_action
    x2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getX(), hist_sc_state2))) / 1000.0  # Convert to km
    y2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getY(), hist_sc_state2))) / 1000.0  # Convert to km

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.set_xlim(-12000, 12000)
    axs.set_ylim(-12000, 12000)
    axs.grid(False)
    axs.plot(x, y, "k", zorder=2)
    l2, = axs.plot(x2, y2, zorder=1)
    l2.set_color("#777777")
    axs.legend(["Before", "After"],
               loc='upper right',
               frameon=False,
               bbox_to_anchor=(0.0, 1.0))
    im = mpimg.imread('earth.png')
    plt.imshow(im, extent=[-6400, 6400, -6400, 6400], interpolation="none")
    axs.set_aspect('equal')
    plt.text(11000, 0, "Pericenter")
    plt.text(-18500, 0, "Apocenter")
    plt.axis('off')
    plt.tight_layout()
    fig.savefig("orbit.pdf", format="pdf")
    plt.close(fig)

示例#8

0

显示文件

def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0')
    env = NormalizeObservationSpace(env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    time = np.array(list(map(lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[0].getDate()),
                             hist_sc_state))) / 3600.0  # Convert to hours
    a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0  # Convert to km
    e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state)))
    mass = np.array(list(map(lambda sc_state: sc_state.getMass(), hist_sc_state)))
    ra = a * (1.0 + e)
    rp = a * (1.0 - e)
    v = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getVelocity().toArray(), hist_sc_state)))
    h = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getMomentum().toArray(), hist_sc_state)))
    angle_f_v = list(map(lambda q:
                         np.degrees(np.arccos(
                             np.dot(q[0], q[1]) / np.linalg.norm(q[0]) / (np.linalg.norm(q[1]) + 1e-10)
                         )),
                         zip(v, hist_action)))
    hist_action_plane = list(map(lambda q: q[1] - np.dot(q[1], q[0]) * q[0] / (np.linalg.norm(q[0]) ** 2),
                                 zip(h, hist_action)))
    angle_fp_v = list(map(lambda q:
                          np.degrees(np.arccos(
                              np.dot(q[0], q[1] * [1, 1, 0]) / np.linalg.norm(q[0]) / (
                                      np.linalg.norm(q[1] * [1, 1, 0]) + 1e-10)
                          )),
                          zip(v, hist_action_plane)))

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("ra (km)")
    axs.plot(time, ra, "k")
    plt.tight_layout()
    fig.savefig("plan_ra.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("rp (km)")
    axs.plot(time, rp, "k")
    plt.tight_layout()
    fig.savefig("plan_rp.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(mass[0] - 0.04, mass[0])
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("mass (kg)")
    axs.plot(time, mass, "k")
    plt.tight_layout()
    fig.savefig("plan_m.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain')
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(-1.3, 1.3)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("action")
    l1, l2, l3 = axs.plot(time[0:-1], hist_action, "k")
    l1.set_color("#000000")
    l2.set_color("#777777")
    l3.set_color("#BBBBBB")
    axs.legend(["Act1", "Act2", "Act3"], loc='upper left')
    plt.tight_layout()
    fig.savefig("plan_action.pdf", format="pdf")
    plt.close(fig)