Пример #1
0
def _(samples, estimator, repetitions=1000, probs=0.68):
    estimand = torch.zeros(repetitions)
    for i in range(repetitions):
        bootstrap_values = resample(samples,
                                    num_samples=len(samples),
                                    replacement=True)
        estimand[i] = estimator(bootstrap_values)
    return pi(estimand, probs)
Пример #2
0
def test_resample(replacement):
    x = torch.empty(10000, 2)
    x[:, 0].normal_(3, 4)
    x[:, 1].normal_(5, 6)

    num_samples = 5000
    y = resample(x, num_samples=num_samples, replacement=replacement)
    z = resample(x.t(),
                 num_samples=num_samples,
                 dim=1,
                 replacement=replacement)
    if not replacement:
        assert_equal(torch.unique(y.reshape(-1)).numel(), y.numel())
        assert_equal(torch.unique(z.reshape(-1)).numel(), z.numel())
    assert_equal(y.shape, torch.Size([num_samples, 2]))
    assert_equal(z.shape, torch.Size([2, num_samples]))
    assert_equal(y.mean(dim=0), torch.tensor([3.0, 5.0]), prec=0.2)
    assert_equal(z.mean(dim=1), torch.tensor([3.0, 5.0]), prec=0.2)
    assert_equal(y.std(dim=0), torch.tensor([4.0, 6.0]), prec=0.2)
    assert_equal(z.std(dim=1), torch.tensor([4.0, 6.0]), prec=0.2)
Пример #3
0
# %%
# Put the embeddings into a DataFrame
#y = pd.DataFrame(msg_embeddings)
y = torch.tensor(msg_embeddings, dtype=torch.get_default_dtype())

# Prior over X
k = 2  # Number of dimensions for latent space
X_prior_mean = torch.zeros(y.size(1), k)

# Kernel definition
kernel = gp.kernels.RBF(input_dim=k, lengthscale=torch.ones(2))

# Clone the prior mean so it doesn't change during training
X = Parameter(X_prior_mean.clone())

Xu = stats.resample(X_prior_mean.clone(), 32)
gplvm = gp.models.SparseGPRegression(X,
                                     y,
                                     kernel,
                                     Xu,
                                     noise=torch.tensor(0.01),
                                     jitter=1e-5)

gplvm.X = pyro.nn.PyroSample(dist.Normal(X_prior_mean, 0.1).to_event())
gplvm.autoguide("X", dist.Normal)

# %%
losses = gp.util.train(gplvm, num_steps=4000)
plt.plot(losses)
plt.show()
Пример #4
0
def run_gplvm(y, informative_prior=True):
    pyro.set_rng_seed(1)

    # the latent variables are X (in the tut, X is called Latent Space)
    # dim(X) = 2 to describe 2 aspects:
    #   + capture-time (1,2,4,8,32,64) (6 stages)
    #   + cell-branching types (TE, ICM, PE, EPI)

    # Stick the capture-time feature to x-axis
    # note that, we are using the supervised information
    capture_time = y.new_tensor([
        int(cell_name.split(" ")[0]) for cell_name in df.index.values
    ])  # in [1, 2, 4, 8, 32, 64]

    capture_time_normalized = capture_time.log2() / 6  # in range [0, 1]

    # try to corrupt this supervised info, e.g., let keep 10% of them
    # print(capture_time_normalized.shape)
    # mask = torch.randint(
    #     low=0,
    #     high=capture_time_normalized.size(0),
    #     size=(int(0.9 * capture_time_normalized.size(0)),),
    # )
    # capture_time_normalized[mask] = -0.1

    # setup the mean of the prior over X
    X_prior_mean = torch.zeros(y.size(1), 2)  # n_observations x x_sim
    if informative_prior:
        X_prior_mean[:, 0] = capture_time_normalized

    # note that X has 2 features
    # the first feature we set the prior to capture_time_normalized (this is just the prior)
    # this will be changed in the posterior
    # and the second features has zero mean, it will be inferred "from scratch"

    # construction of a Sparse Gaussian Process

    # RBF kernel
    kernel = gp.kernels.RBF(input_dim=2, lengthscale=torch.ones(2))

    # define X as Parameter so its "param" can be learned / we can set a prior and guide
    X = Parameter(X_prior_mean.clone())

    # build a SparesGP with num_inducing=32
    Xu = stats.resample(X_prior_mean.clone(), 32)
    gplvm = gp.models.SparseGPRegression(X,
                                         y,
                                         kernel,
                                         Xu=Xu,
                                         noise=torch.tensor(0.01),
                                         jitter=1e-5)

    # set prior and guide for the GP-LVM
    gplvm.set_prior("X", dist.Normal(X_prior_mean, 0.1).to_event())
    gplvm.autoguide("X", dist.Normal)

    # Inference: train GP by gp.util.train <- which use VI with Adam lr=0.01
    t = time.time()
    print("Start training")
    losses = gp.util.train(gplvm, num_steps=4000)
    print(f"Training GP-LVM in {time.time() - t} seconds")

    plt.plot(losses)
    plt.savefig("./plots/gplvm_losses.png")

    # now the mean and std of X (in q(X) ~ p(X|y)) will be store in X_loc and X_scale
    # important: to get sample from q(X), set `mode` of `gplvm` to `guide`
    gplvm.mode = "guide"  # default: "model"
    X = gplvm.X_loc.detach().numpy()

    viz(X, name="gplvm")
    # viz_bokeh(X, name=("gplvm_with_prior" if informative_prior
    #                    else "gplvm_non_informative_prior"))
    return X