示例#1
0
def initialise_using_kernel_fun(
        kernel_fun: Callable[[tf.Tensor, tf.Tensor], tf.Tensor], Z: tf.Tensor,
        initial_mu: Optional[tf.Tensor] = None) \
        -> InducingPointGPSpecification:

    initial_L = get_initial_values_from_kernel(Z, kernel_fun)

    if initial_mu is None:
        initial_mu = tf.zeros(Z.shape[0])

    return InducingPointGPSpecification(mu=initial_mu,
                                        L_elts=initial_L,
                                        kernel_fun=kernel_fun,
                                        Z=Z)
示例#2
0
def initialise_covariance_entries(kernel_creation_fun, flat_kernel_params,
                                  start_z):

    init_kernels = kernel_creation_fun(tf.constant(kernel_params, dtype=DTYPE))
    start_cov_elts = list()

    for cur_kernel_fun in init_kernels:
        # Get the initial values
        cur_vals = get_initial_values_from_kernel(tf.constant(start_z,
                                                              dtype=DTYPE),
                                                  cur_kernel_fun,
                                                  lo_tri=True)
        start_cov_elts.append(cur_vals)

    start_cov_elts = tf.stack(start_cov_elts, axis=0).numpy()

    return start_cov_elts
示例#3
0
def get_mogp_initial_values(n_cov, n_latent, n_inducing):

    ms = tf.zeros((n_latent, n_inducing))

    start_lscales = tf.random.uniform((n_latent, n_cov), minval=np.sqrt(2),
                                      maxval=np.sqrt(4))
    # start_alphas = tf.random.uniform((n_latent,), minval=0.1, maxval=1.)
    start_alphas = tf.ones((n_latent,)) * tf.sqrt(0.1)

    start_kerns = [partial(matern_kernel_32, alpha=alpha, lengthscales=lscale,
                           jitter=JITTER)
                   for alpha, lscale in zip(start_alphas, start_lscales**2)]

    w_means = tf.random.normal((n_latent, n_species), stddev=0.01)
    w_vars = tf.ones((n_latent, n_species))

    init_ls = [tf.constant(get_initial_values_from_kernel(cur_z, cur_kern)) for
               cur_z, cur_kern in zip(Z, start_kerns)]

    return (ms, start_lscales, start_alphas, start_kerns, w_means, w_vars,
            init_ls)
def fit(X: np.ndarray,
        y: np.ndarray,
        n_inducing: int = 100,
        n_latent: int = 10,
        kernel: str = 'matern_3/2',
        random_seed: int = 2):

    # TODO: This is copied from the mogp_classifier.
    # Maybe instead make it a function of some sort?
    np.random.seed(random_seed)

    # Note that input _must_ be scaled. Some way to enforce that?
    kernel_fun = kern_lookup[kernel]

    n_cov = X.shape[1]
    n_out = y.shape[1]

    # Set initial values
    start_lengthscales = np.random.uniform(2., 4., size=(n_latent, n_cov))

    Z = find_starting_z(X, n_inducing)
    Z = np.tile(Z, (n_latent, 1, 1))

    start_kernel_funs = get_kernel_funs(kernel_fun,
                                        np.sqrt(start_lengthscales))

    init_Ls = np.stack([
        get_initial_values_from_kernel(cur_z, cur_kernel_fun)
        for cur_z, cur_kernel_fun in zip(Z, start_kernel_funs)
    ])

    init_ms = np.zeros((n_latent, n_inducing))

    start_prior_cov = np.eye(n_latent)
    start_prior_mean = np.zeros(n_latent)
    start_prior_cov_elts = corr_mogp.get_initial_w_elements(
        start_prior_mean, start_prior_cov, n_out)

    start_w_cov_elts = rep_vector(start_prior_cov_elts, n_out)

    init_w_means = np.random.randn(n_out, n_latent)

    start_theta = {
        'mu': init_ms,
        'L_elts': init_Ls,
        'w_means': init_w_means,
        'w_cov_elts': start_w_cov_elts,
        'lengthscales': start_lengthscales,
        'w_prior_cov_elts': start_prior_cov_elts,
        'w_prior_mean': start_prior_mean,
        'Z': Z
    }

    flat_start_theta, summary = flatten_and_summarise_tf(**start_theta)

    X_tf = tf.constant(X.astype(np.float32))
    y_tf = tf.constant(y.astype(np.float32))

    def extract_cov_matrices(theta):

        w_covs = create_pos_def_mat_from_elts_batch(theta['w_cov_elts'],
                                                    n_latent,
                                                    n_out,
                                                    jitter=JITTER)

        Ls = mogp.create_ls(theta['L_elts'], n_inducing, n_latent)

        w_prior_cov = create_pos_def_mat_from_elts(theta['w_prior_cov_elts'],
                                                   n_latent,
                                                   jitter=JITTER)

        return w_covs, Ls, w_prior_cov

    def calculate_objective(theta):

        w_covs, Ls, w_prior_cov = extract_cov_matrices(theta)

        print(np.round(covar_to_corr(w_prior_cov.numpy()), 2))
        print(np.round(theta['lengthscales'].numpy()**2, 2))

        kernel_funs = get_kernel_funs(kernel_fun, theta['lengthscales']**2)

        cur_objective = corr_mogp.compute_default_objective(
            X_tf, y_tf, theta['Z'], theta['mu'], Ls, theta['w_means'], w_covs,
            kernel_funs, bernoulli_probit_lik, theta['w_prior_mean'],
            w_prior_cov)

        # Add prior
        lscale_prior = tfp.distributions.Gamma(3, 1 / 3).log_prob(
            theta['lengthscales']**2)

        return cur_objective + tf.reduce_sum(lscale_prior)

    def to_minimize(flat_theta):

        flat_theta = tf.constant(flat_theta)
        flat_theta = tf.cast(flat_theta, tf.float32)

        with tf.GradientTape() as tape:

            tape.watch(flat_theta)

            theta = reconstruct_tf(flat_theta, summary)

            objective = -calculate_objective(theta)

            grad = tape.gradient(objective, flat_theta)

        print(objective, np.linalg.norm(grad.numpy()))

        return (objective.numpy().astype(np.float64),
                grad.numpy().astype(np.float64))

    result = minimize(to_minimize,
                      flat_start_theta,
                      jac=True,
                      method='L-BFGS-B')

    final_theta = reconstruct_tf(result.x.astype(np.float32), summary)

    w_covs, Ls, w_prior_cov = extract_cov_matrices(final_theta)

    return CorrelatedMOGPResult(
        Ls=Ls,
        mu=final_theta['mu'].numpy(),
        kernel=kernel,
        lengthscales=final_theta['lengthscales'].numpy()**2,
        w_means=final_theta['w_means'].numpy(),
        w_cov=w_covs.numpy(),
        Z=final_theta['Z'].numpy(),
        w_prior_means=final_theta['w_prior_mean'].numpy(),
        w_prior_cov=w_prior_cov.numpy())
示例#5
0
def fit(
    X: np.ndarray,
    y: np.ndarray,
    n_inducing: int = 100,
    n_latent: int = 10,
    kernel: str = "matern_3/2",
    # Gamma priors (note tfp uses "concentration rate" parameterisation):
    kernel_lengthscale_prior: Tuple[float, float] = (3, 1 / 3),
    bias_variance_prior: Tuple[float, float] = (3 / 2, 3 / 2),
    w_variance_prior: Tuple[float, float] = (3 / 2, 3 / 2),
    # Normal priors
    w_mean_prior: Tuple[float, float] = (0, 1),
    bias_mean_prior: Tuple[float, float] = (0, 1),
    random_seed: int = 2,
    test_run: bool = False,
    total_kernel_variance=6.0,
    verbose=False,
) -> MOGPResult:

    np.random.seed(random_seed)

    # Note that input _must_ be scaled. Some way to enforce that?
    kernel_fun = kern_lookup[kernel]

    n_cov = X.shape[1]
    n_out = y.shape[1]

    # Set initial values
    start_lengthscales = np.random.uniform(2.0, 4.0, size=(n_latent, n_cov)).astype(
        np.float32
    )

    Z = find_starting_z(X, n_inducing)
    Z = np.tile(Z, (n_latent, 1, 1))
    Z = Z.astype(np.float32)

    start_kernel_funs = get_kernel_funs(
        kernel_fun,
        tf.constant(start_lengthscales),
        total_variance=tf.constant(total_kernel_variance),
    )

    init_Ls = np.stack(
        [
            get_initial_values_from_kernel(tf.constant(cur_z), cur_kernel_fun)
            for cur_z, cur_kernel_fun in zip(Z, start_kernel_funs)
        ]
    )

    init_ms = np.zeros((n_latent, n_inducing))
    w_prior_var_init = np.ones((n_latent, 1)) * 1.0
    w_prior_mean_init = np.zeros((n_latent, 1))

    start_intercept_means = np.zeros(n_out)
    start_intercept_var = np.ones(n_out)
    intercept_prior_var_init = np.array(0.4)

    init_theta = {
        "L_elts": init_Ls,
        "mu": init_ms,
        "w_prior_var": w_prior_var_init,
        "w_prior_mean": w_prior_mean_init,
        "intercept_means": start_intercept_means,
        "intercept_vars": start_intercept_var,
        "intercept_prior_var": intercept_prior_var_init,
        "intercept_prior_mean": np.array(0.0),
        "w_means": np.random.randn(n_latent, n_out) * 0.01,
        "w_vars": np.ones((n_latent, n_out)),
        "lscales": np.sqrt(start_lengthscales),
        "Z": Z,
    }

    # Make same type
    init_theta = {x: tf.constant(y.astype(np.float32)) for x, y in init_theta.items()}

    flat_theta, summary = flatten_and_summarise_tf(**init_theta)

    X = tf.constant(X.astype(np.float32))
    y = tf.constant(y.astype(np.float32))

    lscale_prior = tfp.distributions.Gamma(*kernel_lengthscale_prior)
    bias_var_prior = tfp.distributions.Gamma(*bias_variance_prior)
    w_var_prior = tfp.distributions.Gamma(*w_variance_prior)

    w_m_prior = tfp.distributions.Normal(*w_mean_prior)
    bias_m_prior = tfp.distributions.Normal(*bias_mean_prior)

    # TODO: Think about priors for W?

    def to_minimize_with_grad(x):

        with tf.GradientTape() as tape:

            x_tf = tf.constant(x)
            x_tf = tf.cast(x_tf, tf.float32)

            tape.watch(x_tf)

            theta = reconstruct_tf(x_tf, summary)

            # Square the important parameters
            (lscales, w_prior_var, intercept_vars, intercept_prior_var, w_vars) = (
                theta["lscales"] ** 2,
                theta["w_prior_var"] ** 2,
                theta["intercept_vars"] ** 2,
                theta["intercept_prior_var"] ** 2,
                theta["w_vars"] ** 2,
            )

            if verbose:
                print(lscales)
                print(intercept_prior_var)
                print(w_prior_var)
                print(theta["w_prior_mean"])
                print(theta["intercept_prior_mean"])

            Ls = create_ls(theta["L_elts"], n_inducing, n_latent)

            kern_funs = get_kernel_funs(
                kernel_fun,
                lscales,
                total_variance=tf.constant(total_kernel_variance, dtype=tf.float32),
            )

            kl = compute_kl_term(
                theta["mu"],
                Ls,
                kern_funs,
                theta["Z"],
                theta["w_means"],
                w_vars,
                theta["w_prior_mean"],
                w_prior_var,
                theta["intercept_means"],
                intercept_vars,
                theta["intercept_prior_mean"],
                intercept_prior_var,
            )

            lik = compute_likelihood_term(
                X,
                y,
                theta["Z"],
                theta["mu"],
                Ls,
                kern_funs,
                theta["w_means"],
                w_vars,
                theta["intercept_means"],
                intercept_vars,
            )

            objective = -(lik - kl)

            objective = objective - (
                tf.reduce_sum(lscale_prior.log_prob(lscales))
                + bias_var_prior.log_prob(intercept_prior_var)
                + tf.reduce_sum(w_var_prior.log_prob(w_prior_var))
                + bias_m_prior.log_prob(theta["intercept_prior_mean"])
                + tf.reduce_sum(w_m_prior.log_prob(theta["w_prior_mean"]))
            )

            grad = tape.gradient(objective, x_tf)

        if verbose:
            print(objective, np.linalg.norm(grad.numpy()))

        return (objective.numpy().astype(np.float64), grad.numpy().astype(np.float64))

    if test_run:
        additional_args = {"tol": 1}
    else:
        additional_args = {}

    result = minimize(
        to_minimize_with_grad,
        flat_theta,
        jac=True,
        method="L-BFGS-B",
        **additional_args
    )

    final_theta = reconstruct_tf(result.x, summary)
    final_theta = {x: tf.cast(y, tf.float32) for x, y in final_theta.items()}

    # Build the results
    fit_result = MOGPResult(
        L_elts=final_theta["L_elts"],
        mu=final_theta["mu"],
        kernel=kernel,
        lengthscales=final_theta["lscales"] ** 2,
        intercept_means=final_theta["intercept_means"],
        intercept_vars=final_theta["intercept_vars"] ** 2,
        w_means=final_theta["w_means"],
        w_vars=final_theta["w_vars"] ** 2,
        Z=final_theta["Z"],
        w_prior_means=final_theta["w_prior_mean"],
        w_prior_vars=final_theta["w_prior_var"] ** 2,
        intercept_prior_mean=final_theta["intercept_prior_mean"],
        intercept_prior_var=final_theta["intercept_prior_var"] ** 2,
        total_kernel_variance=tf.constant(total_kernel_variance, tf.float32),
    )

    return fit_result
示例#6
0
def fit(
    X: np.ndarray,
    y: np.ndarray,
    n_inducing: int = 100,
    kernel: str = "matern_3/2",
    # Gamma priors (note tfp uses "concentration rate" parameterisation):
    kernel_variance_prior: Tuple[float, float] = (3 / 2, 3 / 2),
    kernel_lengthscale_prior: Tuple[float, float] = (3, 1 / 3),
    bias_variance_prior: Tuple[float, float] = (3 / 2, 3 / 2),
    random_seed: int = 2,
    verbose: bool = False,
) -> SOGPResult:

    np.random.seed(random_seed)

    assert kernel in [
        "matern_3/2",
        "matern_1/2",
        "rbf",
    ], "Only these three kernels are currently supported!"

    # Note that input _must_ be scaled. Some way to enforce that?

    kernel_fun = kern_lookup[kernel]

    n_cov = X.shape[1]

    # Set initial values
    start_alpha = np.array(1.0, dtype=np.float32)
    start_lengthscales = np.random.uniform(2.0, 4.0,
                                           size=n_cov).astype(np.float32)
    start_bias_sd = np.array(1.0, dtype=np.float32)

    Z = find_starting_z(X, n_inducing).astype(np.float32)

    start_kernel_fun = get_kernel_fun(kernel_fun, start_alpha,
                                      start_lengthscales, start_bias_sd)

    init_L = get_initial_values_from_kernel(Z, start_kernel_fun)
    init_mu = np.zeros(n_inducing, dtype=np.float32)

    init_theta = {
        "L_elts": init_L,
        "mu": init_mu,
        "alpha": start_alpha,
        "lscales": np.sqrt(start_lengthscales),
        "Z": Z,
        "bias_sd": start_bias_sd,
    }

    flat_theta, summary = flatten_and_summarise_tf(**init_theta)

    X = tf.constant(X.astype(np.float32))
    y = tf.constant(y.astype(np.float32))

    lscale_prior = tfp.distributions.Gamma(*kernel_lengthscale_prior)
    kernel_var_prior = tfp.distributions.Gamma(*kernel_variance_prior)
    bias_var_prior = tfp.distributions.Gamma(*bias_variance_prior)

    def to_minimize_with_grad(x):

        with tf.GradientTape() as tape:

            x_tf = tf.constant(x)
            x_tf = tf.cast(x_tf, tf.float32)

            tape.watch(x_tf)

            theta = reconstruct_tf(x_tf, summary)

            alpha, lscales, bias_sd = (
                theta["alpha"]**2,
                theta["lscales"]**2,
                theta["bias_sd"]**2,
            )

            L_cov = lo_tri_from_elements(theta["L_elts"], n_inducing)

            kern_fun = get_kernel_fun(kernel_fun, alpha, lscales, bias_sd)

            objective = -compute_objective(X, y, theta["mu"], L_cov,
                                           theta["Z"], bernoulli_probit_lik,
                                           kern_fun)

            objective = objective - (tf.reduce_sum(
                lscale_prior.log_prob(lscales)) + kernel_var_prior.log_prob(
                    alpha**2) + bias_var_prior.log_prob(bias_sd**2))

            grad = tape.gradient(objective, x_tf)

        if verbose:
            print(objective, np.linalg.norm(grad.numpy()))

        return (objective.numpy().astype(np.float64),
                grad.numpy().astype(np.float64))

    result = minimize(to_minimize_with_grad,
                      flat_theta,
                      jac=True,
                      method="L-BFGS-B")

    final_theta = reconstruct_tf(result.x, summary)
    final_theta = {
        x: y.numpy().astype(np.float32)
        for x, y in final_theta.items()
    }

    # Build the results
    fit_result = SOGPResult(
        L_elts=final_theta["L_elts"],
        mu=final_theta["mu"],
        kernel=kernel,
        lengthscales=final_theta["lscales"]**2,
        alpha=final_theta["alpha"]**2,
        bias_sd=final_theta["bias_sd"]**2,
        Z=final_theta["Z"],
    )

    return fit_result