def param_maximum_likelihood(gpy_gp: GPy.models.GPRegression,
                             test_model: bool = True,
                             test_X: np.ndarray = None,
                             test_Y: np.ndarray = None,
                             variance_prior: GPy.priors.Prior = None,
                             lengthscale_prior: GPy.priors.Prior = None,
                             noise_prior: GPy.priors.Prior = None,
                             fix_noise_params=None):
    """
    Find the maximum likelihood estimates of the hyperparameters. The last element in the vector returned is the
    Gaussian noise hyperparameter that we are interested in.
    The hyperparameters correspond to
    $
    \theta^* = argmin(-log(P(Y|\theta)))
    $
    If prior_mean and prior_var arguments (the parameter prior and covariance, assuming a Gaussian parameter prior
    $p(\theta)$ are provided, the optimised hyperparameters corrspond to the MAP-II estimate:
    $
    \theta^* = argmin(-log(P(Y|/theta) - log(P(\theta))
    $

    :param gpy_gp: An initialised GPy GPRegression object
    :param test_model: toggle whether to display the information of the fitted GPRegression model
    :return: The vector of hyperparameters found from maximum likelihood estimate
    """
    if test_model and (test_X is None or test_Y is None):
        raise ValueError()
    if variance_prior is not None:
        gpy_gp.kern.variance.set_prior(variance_prior)
    if lengthscale_prior is not None:
        gpy_gp.kern.lengthscale.set_prior(lengthscale_prior)
    if noise_prior is not None:
        gpy_gp.Gaussian_noise.variance.set_prior(noise_prior)
    if fix_noise_params is not None:
        gpy_gp.Gaussian_noise[:] = fix_noise_params
        gpy_gp.Gaussian_noise.variance.fix()

    start = time.time()
    gpy_gp.optimize(messages=True, max_iters=MLE_optimisation_iteration)
    gpy_gp.optimize_restarts(num_restarts=MLE_optimisation_restart)
    res = [
        gpy_gp.kern.variance, gpy_gp.kern.lengthscale,
        gpy_gp.Gaussian_noise.variance
    ]
    end = time.time()
    rmse, ll = np.nan, np.nan
    if test_model:
        if noise_prior is not None: model = 'MAP'
        else: model = 'MLE'
        print("----------------- Testing " + model + " -------------------")
        print("Fix noise hyperparameter ?", fix_noise_params)
        print("Clock time: ", end - start)
        display(gpy_gp)
        rmse, ll = test_gp(
            gpy_gp,
            test_X,
            test_Y,
        )
    return gpy_gp, res, rmse, ll, end - start
def lin_shrinkage(gpy_gp: GPy.models.GPRegression,
                  test_model: bool = True,
                  test_X: np.ndarray = None,
                  test_Y: np.ndarray = None,
                  c: float = 1e-6,
                  plot_stuff=False):
    """
    Use linear shrinkage method to estimate the Gaussian noise hyperparameter
    :param gpy_gp:
    :param test_model:
    :return:
    """
    start = time.time()
    train_x = gpy_gp.X
    priors = GPy.priors.LogGaussian(mu=0., sigma=2.)
    gpy_gp.kern.variance.set_prior(priors)
    gpy_gp.kern.lengthscale.set_prior(priors)
    i, j = 0, 0
    metrics = np.empty((5, 3 + gpy_gp.input_dim))
    kern_dim = 1 + gpy_gp.input_dim
    gpy_gp.Gaussian_noise.fix()
    lml = -np.inf

    while j < 1:
        i = 0
        theta = np.exp(
            multivariate_normal(mean=np.zeros((kern_dim, )),
                                cov=4 * np.eye(kern_dim)).rvs())
        while i < 1000:
            gpy_gp.optimize(start=theta, max_iters=1)
            K = gpy_gp.kern.K(train_x)
            eig = np.linalg.eigvals(K).real
            eig = np.sort(eig)[::-1]

            delta_lambda = np.diff(eig)
            try:
                n_outlier = delta_lambda[np.abs(delta_lambda) > c].argmax()
            except ValueError:
                n_outlier = 0
            eig_bulk = eig[n_outlier:]

            # bm = np.sum(eig_bulk) / eig_bulk.shape[0]
            lambda_b = np.median(eig_bulk)
            alpha = 1. / (1. + lambda_b)
            gpy_gp.Gaussian_noise.variance[:] = (1 - alpha)
            gpy_gp.kern.variance[:] *= alpha

            theta = gpy_gp.param_array[:-1]
            # print(i, lml, lambda_b)
            # optimise the rest of the hyperparameters
            lml = gpy_gp.log_likelihood()
            i += 1

        if plot_stuff:
            n_eig = eig.shape[0]
            plt.subplot(211)
            plt.axvline(int(len(eig) / 2))
            plt.plot(np.log(eig), marker='.', linestyle='None')
            plt.subplot(212)
            plt.hist(eig[int(len(eig) / 5):], bins=80)
            plt.axvline(lambda_b)
            plt.show()

        print('LML After Optimisation Restart', gpy_gp.log_likelihood())
        print('Eig lambda_b/avg', lambda_b)
        metrics[j, :-1] = gpy_gp.param_array
        metrics[j, -1] = lml
        j += 1

    # print(metrics)
    best_iter = metrics[:, -1].argmax()
    gpy_gp.kern.variance[:] = metrics[best_iter, 0]
    gpy_gp.kern.lengthscale[:] = metrics[best_iter, 1:-2]
    gpy_gp.Gaussian_noise.variance[:] = metrics[best_iter, -2]
    # print(metrics)
    final_params = metrics[best_iter, :-1]
    end = time.time()
    rmse, ll = np.nan, np.nan
    if test_model:
        print("----------------- Testing Linear Shrinkage -------------------")
        display(gpy_gp)
        print("Clock time: ", end - start)
        rmse, ll = test_gp(
            gpy_gp,
            test_X,
            test_Y,
        )
    gpy_gp.Gaussian_noise.unfix()
    gpy_gp.unset_priors()
    return gpy_gp, final_params, rmse, ll, end - start
def param_thikonov(gpy_gp: GPy.models.GPRegression,
                   test_model: bool = True,
                   test_X: np.ndarray = None,
                   test_Y: np.ndarray = None,
                   c: float = 1e-3,
                   plot_stuff=False,
                   save_stuff=True):
    """
    Use Thikonov Regularisation method to estimate the Gaussian noise hyperparameter
    1. We compute the eigenvalue-eigenvector decomposition of the K matrix
    2. Compute the number of outliers in the eigenspectrum
    3. Estimate the bulk mean of the eigenvalues - this will be used as the noise variance
    :param gpy_gp:
    :param test_model:
    :return:
    """
    start = time.time()
    train_x = gpy_gp.X
    priors = GPy.priors.LogGaussian(mu=0., sigma=2.)
    gpy_gp.kern.variance.set_prior(priors)
    gpy_gp.kern.lengthscale.set_prior(priors)
    i, j = 0, 0
    metrics = np.empty((10, 3 + gpy_gp.input_dim))
    kern_dim = 1 + gpy_gp.input_dim
    gpy_gp.Gaussian_noise.fix()
    lml = -np.inf
    C = np.zeros((10, 25))

    while j < 10:
        i = 0
        theta = np.exp(
            multivariate_normal(mean=np.zeros((kern_dim, )),
                                cov=4 * np.eye(kern_dim)).rvs())
        while i < 25:
            gpy_gp.optimize(start=theta, max_iters=20)
            K = gpy_gp.kern.K(train_x)
            eig = np.linalg.eigvals(K).real
            eig = np.sort(eig)[::-1]
            sigma = np.std(K)

            delta_lambda = np.empty(eig.shape[0] - 1)
            for k in range(1, eig.shape[0]):
                delta_lambda[k - 1] = (eig[k] - eig[k - 1]) / eig[0]
            try:
                n_outlier = delta_lambda[np.abs(delta_lambda) > c].argmax()
            except ValueError:
                n_outlier = 0
            eig_bulk = eig[n_outlier:]
            # eig_bulk = eig[eig <= eig_max]

            # bm = np.sum(eig_bulk) / eig_bulk.shape[0]
            med = np.median(eig_bulk)
            # mean = np.median(eig)

            gpy_gp.Gaussian_noise.variance[:] = med
            theta = gpy_gp.param_array[:-1]
            # print(i, lml, med)
            # optimise the rest of the hyperparameters
            lml = gpy_gp.log_likelihood()

            if save_stuff:
                C[j, i] = med
                if i % 10 == 0:
                    #np.savetxt('output/7Mar/covariance_matrix_'+str(j)+'_'+str(i)+'.txt', K)
                    #np.savetxt('output/7Mar/median_bulk_'+str(j)+'_'+str(i)+'.txt', med.reshape(1, 1))
                    pass

            i += 1

        if plot_stuff:
            plt.subplot(211)
            plt.axvline(int(len(eig) / 2))
            plt.plot(np.log(eig), marker='.', linestyle='None')
            plt.subplot(212)
            plt.hist(eig[int(len(eig) / 5):], bins=80)
            plt.axvline(med)
            plt.show()

        print('LML After Optimisation Restart', gpy_gp.log_likelihood(), j)
        print('Eig med/avg', med)
        metrics[j, :-1] = gpy_gp.param_array
        metrics[j, -1] = lml
        j = j + 1

    # print(metrics)
    best_iter = metrics[:, -1].argmax()
    gpy_gp.kern.variance[:] = metrics[best_iter, 0]
    gpy_gp.kern.lengthscale[:] = metrics[best_iter, 1:-2]
    gpy_gp.Gaussian_noise.variance[:] = metrics[best_iter, -2]
    # print(metrics)
    np.savetxt('output/7Mar/beta_.txt', C)
    final_params = metrics[best_iter, :-1]
    end = time.time()
    rmse, ll = np.nan, np.nan
    if test_model:
        print("----------------- Testing Thikonov -------------------")
        display(gpy_gp)
        print("Clock time: ", end - start)
        rmse, ll = test_gp(
            gpy_gp,
            test_X,
            test_Y,
        )
    gpy_gp.Gaussian_noise.unfix()
    gpy_gp.unset_priors()
    return gpy_gp, final_params, rmse, ll, end - start