def _brute_fit(self, data_points, target_values, max_iter=None):
        """
        Optimizes covariance hyper-parameters
        :param data_points: an array of data points
        :param target_values: target values' vector
        :return:
        """
        if not (isinstance(data_points, np.ndarray)
                and isinstance(target_values, np.ndarray)):
            raise TypeError("The operands must be of type numpy array")

        def loc_fun(w):
            loss, grad = self._oracle(data_points, target_values, w)
            return -loss, -grad

        bnds = self.covariance_obj.get_bounds()
        if max_iter is None:
            max_iter = np.inf
        res, w_list, time_list = minimize_wrapper(
            loc_fun,
            self.covariance_obj.get_params(),
            method='L-BFGS-B',
            mydisp=False,
            bounds=bnds,
            options={
                'gtol': 1e-8,
                'ftol': 0,
                'maxiter': max_iter
            })
        optimal_params = res.x
        self.covariance_obj.set_params(optimal_params)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))
    def _brute_fit(self, data_points, target_values, max_iter=None):
        """
        Optimizes covariance hyper-parameters
        :param data_points: an array of data points
        :param target_values: target values' vector
        :return:
        """
        if not(isinstance(data_points, np.ndarray) and
               isinstance(target_values, np.ndarray)):
            raise TypeError("The operands must be of type numpy array")

        def loc_fun(w):
            loss, grad = self._oracle(data_points, target_values, w)
            return -loss, -grad

        bnds = self.covariance_obj.get_bounds()
        if max_iter is None:
            max_iter = np.inf
        res, w_list, time_list = minimize_wrapper(loc_fun, self.covariance_obj.get_params(), method='L-BFGS-B',
                                                               mydisp=False, bounds=bnds,
                                                               options={'gtol': 1e-8, 'ftol': 0, 'maxiter': max_iter})
        optimal_params = res.x
        self.covariance_obj.set_params(optimal_params)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))
    def _svi_fit(self,
                 data_points,
                 target_values,
                 num_inputs=0,
                 inputs=None,
                 optimizer_options={}):
        """
        A method for optimizing hyper-parameters (for fixed inducing points), based on stochastic variational inference
        :param data_points: training set objects
        :param target_values: training set answers
        :param inputs: inducing inputs
        :param num_inputs: number of inducing points to generate. If inducing points are provided, this parameter is
        ignored
        :param max_iter: maximum number of iterations in stochastic gradient descent
        :return:
        """

        # if no inducing inputs are provided, we use K-Means cluster centers as inducing inputs
        if inputs is None:
            means = KMeans(n_clusters=num_inputs)
            means.fit(data_points.T)
            inputs = means.cluster_centers_.T
            # inputs = np.load("inputs.npy")

        # Initializing required variables
        y = target_values
        m = num_inputs
        n = y.size

        # Initializing variational (normal) distribution parameters
        mu = np.zeros((m, 1))
        sigma_n = self.covariance_obj.get_params()[-1]

        theta = self.covariance_obj.get_params()
        if self.parametrization == 'natural':

            cov_fun = self.covariance_obj.covariance_function
            K_mn = cov_fun(inputs, data_points)
            K_mm = cov_fun(inputs, inputs)
            K_mm_inv = np.linalg.inv(K_mm)

            sigma_inv = K_mm_inv.dot(K_mn.dot(
                K_mn.T.dot(K_mm_inv))) / sigma_n**2 + K_mm_inv
            sigma = np.linalg.inv(sigma_inv)
            mu = sigma.dot(K_mm_inv.dot((K_mn.dot(y)))) / sigma_n**2

            # Canonical parameters initialization
            eta_1 = sigma_inv.dot(mu)
            eta_2 = -sigma_inv / 2
            param_vec = self._svi_get_parameter_vector(theta, eta_1, eta_2)

        elif self.parametrization == 'cholesky':
            # sigma_L = np.eye(m)
            # mu = np.random.multivariate_normal(mean=np.zeros_like(mu)[:,0], cov=np.eye(mu.size)*5)
            # sigma_L = np.eye(m)  # Cholesky factor of sigma

            cov_fun = self.covariance_obj.covariance_function
            K_mn = cov_fun(inputs, data_points)
            K_mm = cov_fun(inputs, inputs)
            K_mm_inv = np.linalg.inv(K_mm)
            sigma = np.linalg.inv(
                K_mm_inv.dot(K_mn.dot(K_mn.T.dot(K_mm_inv))) / sigma_n**2 +
                K_mm_inv)
            mu = sigma.dot(K_mm_inv.dot((K_mn.dot(y)))) / sigma_n**2

            # p = np.random.normal(size=(m, 1))
            # sigma = p.dot(p.T) + np.eye(m) * 1e-4

            sigma_L = np.linalg.cholesky(sigma)
            param_vec = self._svi_get_parameter_vector(theta, mu, sigma_L)

        bnds = self._svi_get_bounds(m)

        if self.parametrization == 'natural':

            nat_mult = 1.
            if not optimizer_options is None:
                if 'nat_mult' in optimizer_options.keys():
                    nat_mult = optimizer_options['nat_mult']
                    del optimizer_options['nat_mult']

            print(nat_mult)

            def stoch_fun(x, i):
                grad = -self._svi_elbo_batch_approx_oracle(data_points,
                                                           target_values,
                                                           inputs,
                                                           parameter_vec=x,
                                                           indices=i)[1]
                grad[self.covariance_obj.get_params().size:] *= nat_mult
                return grad

            def adadelta_fun(x, train_points, train_targets):
                _, grad = self._svi_elbo_batch_approx_oracle(
                    train_points,
                    train_targets,
                    inputs,
                    parameter_vec=x,
                    indices=range(train_targets.size),
                    N=n)
                grad[self.covariance_obj.get_params().size:] *= nat_mult
                return -grad

            # indices = list(range(20))
            #
            # mu += np.random.randn(mu.size).reshape(mu.shape)*2
            # beta_1 = mu
            # beta_2 = mu.dot(mu.T) + sigma
            # param_vec = self._svi_get_parameter_vector(theta, beta_1, beta_2)
            #
            # def test_fun(x):
            #     fun, grad = self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
            #                                                indices=indices)
            #     return -fun, -grad
            # check_gradient(test_fun, param_vec, print_diff=True, delta=1e-9)#, indices=[3,4,5,6,7])
            # exit(0)
            #
            if self.optimizer == 'SG':
                res, w_list, time_list = stochastic_gradient_descent(
                    oracle=stoch_fun,
                    n=n,
                    point=param_vec,
                    bounds=bnds,
                    options=optimizer_options)
            elif self.optimizer == 'AdaDelta':
                res, w_list, time_list = climin_wrapper(
                    oracle=adadelta_fun,
                    w0=param_vec,
                    train_points=data_points,
                    train_targets=target_values,
                    options=optimizer_options,
                    method='AdaDelta')
            elif self.optimizer == 'climinSG':
                res, w_list, time_list = climin_wrapper(
                    oracle=adadelta_fun,
                    w0=param_vec,
                    train_points=data_points,
                    train_targets=target_values,
                    options=optimizer_options,
                    method='SG')
            else:
                raise ValueError('Unknown optimizer')

            theta, eta_1, eta_2 = self._svi_get_parameters(res)
            sigma_inv = -2 * eta_2
            sigma = np.linalg.inv(sigma_inv)
            mu = sigma.dot(eta_1)

        elif self.parametrization == 'cholesky':

            def fun(x):
                fun, grad = self._svi_elbo_batch_approx_oracle(data_points,
                                                               target_values,
                                                               inputs,
                                                               parameter_vec=x,
                                                               indices=list(
                                                                   range(n)))
                return -fun, -grad

            def sag_oracle(x, i):
                fun, grad = self._svi_elbo_batch_approx_oracle(data_points,
                                                               target_values,
                                                               inputs,
                                                               parameter_vec=x,
                                                               indices=i)
                return -fun, -grad

            def adadelta_fun(x, train_points, train_targets):
                fun, grad = self._svi_elbo_batch_approx_oracle(
                    train_points,
                    train_targets,
                    inputs,
                    parameter_vec=x,
                    indices=range(train_targets.size),
                    N=n)
                return -grad

            def stoch_fun(x, i):
                return -self._svi_elbo_batch_approx_oracle(data_points,
                                                           target_values,
                                                           inputs,
                                                           parameter_vec=x,
                                                           indices=i)[1]

            if self.optimizer == 'AdaDelta':
                res, w_list, time_list = climin_wrapper(
                    oracle=adadelta_fun,
                    w0=param_vec,
                    train_points=data_points,
                    train_targets=target_values,
                    options=optimizer_options,
                    method='AdaDelta')
            elif self.optimizer == 'climinSG':
                res, w_list, time_list = climin_wrapper(
                    oracle=adadelta_fun,
                    w0=param_vec,
                    train_points=data_points,
                    train_targets=target_values,
                    options=optimizer_options,
                    method='SG')

            elif self.optimizer == 'SG':
                res, w_list, time_list = stochastic_gradient_descent(
                    oracle=stoch_fun,
                    n=n,
                    point=param_vec,
                    bounds=bnds,
                    options=optimizer_options)

            elif self.optimizer == 'SAG':
                res, w_list, time_list = stochastic_average_gradient(
                    oracle=sag_oracle,
                    n=n,
                    point=param_vec,
                    bounds=bnds,
                    options=optimizer_options)

            elif self.optimizer == 'FG':
                res, w_list, time_list = gradient_descent(
                    oracle=fun,
                    point=param_vec,
                    bounds=bnds,
                    options=optimizer_options)
            elif self.optimizer == 'L-BFGS-B':
                mydisp = False
                print_freq = 1
                if not optimizer_options is None:
                    if 'mydisp' in optimizer_options.keys():
                        mydisp = optimizer_options['mydisp']
                        del optimizer_options['mydisp']
                    if 'print_freq' in optimizer_options.keys():
                        print_freq = optimizer_options['print_freq']
                        del optimizer_options['print_freq']
                res, w_list, time_list = minimize_wrapper(
                    fun,
                    param_vec,
                    method='L-BFGS-B',
                    mydisp=mydisp,
                    print_freq=print_freq,
                    bounds=bnds,
                    jac=True,
                    options=optimizer_options)
                res = res['x']
            else:
                raise ValueError('Wrong optimizer for svi method' +
                                 self.optimizer)

            theta, mu, sigma_L = self._svi_get_parameters(res)
            sigma = sigma_L.dot(sigma_L.T)

        self.covariance_obj.set_params(theta)
        self.inducing_inputs = (inputs, mu, sigma)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))
    def _vi_means_fit(self,
                      data_points,
                      target_values,
                      num_inputs,
                      inputs=None,
                      optimizer_options={}):
        """
        A procedure, fitting hyper-parameters and inducing points for both the 'means' and the 'vi' methods.
        :param data_points: data points
        :param target_values: target values at data points
        :param num_inputs: number of inducing inputs to be found
        :param max_iter: maximum number of iterations
        :return: lists of iteration-wise values of hyper-parameters, times, function values for evaluating the
        optimization
        """
        if not (isinstance(data_points, np.ndarray)
                and isinstance(target_values, np.ndarray)):
            raise TypeError("The operands must be of type numpy array")

        dim = data_points.shape[0]
        param_len = self.covariance_obj.get_params().size

        def _vi_loc_fun(w):
            ind_points = (w[param_len:]).reshape(
                (dim,
                 num_inputs))  # has to be rewritten for multidimensional case
            loss, grad = self._vi_means_oracle(data_points, target_values,
                                               w[:param_len], ind_points)
            return -loss, -grad

        def _means_loc_fun(w):
            loss, grad = self._vi_means_oracle(data_points, target_values, w,
                                               inputs)
            return -loss, -grad

        np.random.seed(15)
        if self.method == 'vi':
            inputs = data_points[:, :num_inputs] + np.random.normal(
                0, 0.1, (dim, num_inputs))
            loc_fun = _vi_loc_fun
            w0 = np.concatenate(
                (self.covariance_obj.get_params(), inputs.ravel()))
            bnds = tuple(
                list(self.covariance_obj.get_bounds()) +
                [(1e-2, 1)] * num_inputs * dim)

        if self.method == 'means':
            if inputs is None:
                inputs = self._k_means_cluster_centers(data_points, num_inputs)
            loc_fun = _means_loc_fun
            w0 = self.covariance_obj.get_params()
            bnds = self.covariance_obj.get_bounds()

        if self.optimizer == 'L-BFGS-B':
            mydisp = False
            options = copy.deepcopy(optimizer_options)
            if not optimizer_options is None:
                if 'mydisp' in optimizer_options.keys():
                    mydisp = optimizer_options['mydisp']
                    del options['mydisp']
            res, w_list, time_list = minimize_wrapper(loc_fun,
                                                      w0,
                                                      method='L-BFGS-B',
                                                      mydisp=mydisp,
                                                      bounds=bnds,
                                                      options=options)
            res = res.x
        elif self.optimizer == 'Projected Newton':
            res, w_list, time_list = projected_newton(
                loc_fun, w0, bounds=bnds, options=optimizer_options)

        else:
            raise ValueError('Wrong optimizer for svi/means method:' +
                             self.optimizer)

        if self.method == 'vi':
            optimal_params = res[:-num_inputs * dim]
            inducing_points = res[-num_inputs * dim:]
            inducing_points = inducing_points.reshape((dim, num_inputs))
        if self.method == 'means':
            optimal_params = res
            inducing_points = inputs
        self.covariance_obj.set_params(optimal_params)

        mu, Sigma = self._vi_get_optimal_meancov(optimal_params,
                                                 inducing_points, data_points,
                                                 target_values)
        self.inducing_inputs = (inducing_points, mu, Sigma)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))
    def _svi_fit(self, data_points, target_values, num_inputs=0, inputs=None, optimizer_options={}):
        """
        A method for optimizing hyper-parameters (for fixed inducing points), based on stochastic variational inference
        :param data_points: training set objects
        :param target_values: training set answers
        :param inputs: inducing inputs
        :param num_inputs: number of inducing points to generate. If inducing points are provided, this parameter is
        ignored
        :param max_iter: maximum number of iterations in stochastic gradient descent
        :return:
        """

        # if no inducing inputs are provided, we use K-Means cluster centers as inducing inputs
        if inputs is None:
            means = KMeans(n_clusters=num_inputs)
            means.fit(data_points.T)
            inputs = means.cluster_centers_.T
            # inputs = np.load("inputs.npy")

        # Initializing required variables
        y = target_values
        m = num_inputs
        n = y.size

        # Initializing variational (normal) distribution parameters
        mu = np.zeros((m, 1))
        sigma_n = self.covariance_obj.get_params()[-1]

        theta = self.covariance_obj.get_params()
        if self.parametrization == 'natural':

            cov_fun = self.covariance_obj.covariance_function
            K_mn = cov_fun(inputs, data_points)
            K_mm = cov_fun(inputs, inputs)
            K_mm_inv = np.linalg.inv(K_mm)

            sigma_inv = K_mm_inv.dot(K_mn.dot(K_mn.T.dot(K_mm_inv)))/sigma_n**2 + K_mm_inv
            sigma = np.linalg.inv(sigma_inv)
            mu = sigma.dot(K_mm_inv.dot((K_mn.dot(y)))) / sigma_n**2

            # Canonical parameters initialization
            eta_1 = sigma_inv.dot(mu)
            eta_2 = - sigma_inv / 2
            param_vec = self._svi_get_parameter_vector(theta, eta_1, eta_2)

        elif self.parametrization == 'cholesky':
            # sigma_L = np.eye(m)
            # mu = np.random.multivariate_normal(mean=np.zeros_like(mu)[:,0], cov=np.eye(mu.size)*5)
            # sigma_L = np.eye(m)  # Cholesky factor of sigma

            cov_fun = self.covariance_obj.covariance_function
            K_mn = cov_fun(inputs, data_points)
            K_mm = cov_fun(inputs, inputs)
            K_mm_inv = np.linalg.inv(K_mm)
            sigma = np.linalg.inv(K_mm_inv.dot(K_mn.dot(K_mn.T.dot(K_mm_inv)))/sigma_n**2 + K_mm_inv)
            mu = sigma.dot(K_mm_inv.dot((K_mn.dot(y)))) / sigma_n**2

            # p = np.random.normal(size=(m, 1))
            # sigma = p.dot(p.T) + np.eye(m) * 1e-4

            sigma_L = np.linalg.cholesky(sigma)
            param_vec = self._svi_get_parameter_vector(theta, mu, sigma_L)

        bnds = self._svi_get_bounds(m)

        if self.parametrization == 'natural':

            nat_mult = 1.
            if not optimizer_options is None:
                if 'nat_mult' in optimizer_options.keys():
                    nat_mult = optimizer_options['nat_mult']
                    del optimizer_options['nat_mult']

            print(nat_mult)

            def stoch_fun(x, i):
                grad = -self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
                                                           indices=i)[1]
                grad[self.covariance_obj.get_params().size:] *= nat_mult
                return grad

            def adadelta_fun(x, train_points, train_targets):
                _, grad = self._svi_elbo_batch_approx_oracle(train_points, train_targets, inputs, parameter_vec=x,
                                                               indices=range(train_targets.size), N=n)
                grad[self.covariance_obj.get_params().size:] *= nat_mult
                return -grad

            # indices = list(range(20))
            #
            # mu += np.random.randn(mu.size).reshape(mu.shape)*2
            # beta_1 = mu
            # beta_2 = mu.dot(mu.T) + sigma
            # param_vec = self._svi_get_parameter_vector(theta, beta_1, beta_2)
            #
            # def test_fun(x):
            #     fun, grad = self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
            #                                                indices=indices)
            #     return -fun, -grad
            # check_gradient(test_fun, param_vec, print_diff=True, delta=1e-9)#, indices=[3,4,5,6,7])
            # exit(0)
            #
            if self.optimizer == 'SG':
                res, w_list, time_list = stochastic_gradient_descent(oracle=stoch_fun, n=n, point=param_vec, bounds=bnds,
                                                                 options=optimizer_options)
            elif self.optimizer == 'AdaDelta':
                res, w_list, time_list = climin_wrapper(oracle=adadelta_fun, w0=param_vec, train_points=data_points,
                                                        train_targets=target_values, options=optimizer_options,
                                                        method='AdaDelta')
            elif self.optimizer == 'climinSG':
                res, w_list, time_list = climin_wrapper(oracle=adadelta_fun, w0=param_vec, train_points=data_points,
                                                        train_targets=target_values, options=optimizer_options,
                                                        method='SG')
            else:
                raise ValueError('Unknown optimizer')


            theta, eta_1, eta_2 = self._svi_get_parameters(res)
            sigma_inv = - 2 * eta_2
            sigma = np.linalg.inv(sigma_inv)
            mu = sigma.dot(eta_1)

        elif self.parametrization == 'cholesky':
            def fun(x):
                fun, grad = self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
                                                     indices=list(range(n)))
                return -fun, -grad

            def sag_oracle(x, i):
                fun, grad = self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
                                                               indices=i)
                return -fun, -grad

            def adadelta_fun(x, train_points, train_targets):
                fun, grad = self._svi_elbo_batch_approx_oracle(train_points, train_targets, inputs, parameter_vec=x,
                                                               indices=range(train_targets.size), N=n)
                return -grad

            def stoch_fun(x, i):
                return -self._svi_elbo_batch_approx_oracle(data_points, target_values, inputs, parameter_vec=x,
                                                           indices=i)[1]

            if self.optimizer == 'AdaDelta':
                res, w_list, time_list = climin_wrapper(oracle=adadelta_fun, w0=param_vec, train_points=data_points,
                                                        train_targets=target_values, options=optimizer_options,
                                                        method='AdaDelta')
            elif self.optimizer == 'climinSG':
                res, w_list, time_list = climin_wrapper(oracle=adadelta_fun, w0=param_vec, train_points=data_points,
                                                        train_targets=target_values, options=optimizer_options,
                                                        method='SG')

            elif self.optimizer == 'SG':
                res, w_list, time_list = stochastic_gradient_descent(oracle=stoch_fun, n=n, point=param_vec, bounds=bnds,
                                                                 options=optimizer_options)

            elif self.optimizer == 'SAG':
                res, w_list, time_list = stochastic_average_gradient(oracle=sag_oracle, n=n, point=param_vec, bounds=bnds,
                                                                 options=optimizer_options)

            elif self.optimizer == 'FG':
                res, w_list, time_list = gradient_descent(oracle=fun, point=param_vec, bounds=bnds,
                                                          options=optimizer_options)
            elif self.optimizer == 'L-BFGS-B':
                mydisp = False
                print_freq=1
                if not optimizer_options is None:
                    if 'mydisp' in optimizer_options.keys():
                        mydisp = optimizer_options['mydisp']
                        del optimizer_options['mydisp']
                    if 'print_freq' in optimizer_options.keys():
                        print_freq = optimizer_options['print_freq']
                        del optimizer_options['print_freq']
                res, w_list, time_list = minimize_wrapper(fun, param_vec, method='L-BFGS-B', mydisp=mydisp,
                                                          print_freq=print_freq, bounds=bnds, jac=True,
                                                          options=optimizer_options)
                res = res['x']
            else:
                raise ValueError('Wrong optimizer for svi method' + self.optimizer)

            theta, mu, sigma_L = self._svi_get_parameters(res)
            sigma = sigma_L.dot(sigma_L.T)

        self.covariance_obj.set_params(theta)
        self.inducing_inputs = (inputs, mu, sigma)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))
    def _vi_means_fit(self, data_points, target_values, num_inputs, inputs=None, optimizer_options={}):
        """
        A procedure, fitting hyper-parameters and inducing points for both the 'means' and the 'vi' methods.
        :param data_points: data points
        :param target_values: target values at data points
        :param num_inputs: number of inducing inputs to be found
        :param max_iter: maximum number of iterations
        :return: lists of iteration-wise values of hyper-parameters, times, function values for evaluating the
        optimization
        """
        if not(isinstance(data_points, np.ndarray) and
               isinstance(target_values, np.ndarray)):
            raise TypeError("The operands must be of type numpy array")

        dim = data_points.shape[0]
        param_len = self.covariance_obj.get_params().size

        def _vi_loc_fun(w):
            ind_points = (w[param_len:]).reshape((dim, num_inputs)) # has to be rewritten for multidimensional case
            loss, grad = self._vi_means_oracle(data_points, target_values, w[:param_len], ind_points)
            return -loss, -grad

        def _means_loc_fun(w):
            loss, grad = self._vi_means_oracle(data_points, target_values, w, inputs)
            return -loss, -grad

        np.random.seed(15)
        if self.method == 'vi':
            inputs = data_points[:, :num_inputs] + np.random.normal(0, 0.1, (dim, num_inputs))
            loc_fun = _vi_loc_fun
            w0 = np.concatenate((self.covariance_obj.get_params(), inputs.ravel()))
            bnds = tuple(list(self.covariance_obj.get_bounds()) + [(1e-2, 1)] * num_inputs * dim)

        if self.method == 'means':
            if inputs is None:
                inputs = self._k_means_cluster_centers(data_points, num_inputs)
            loc_fun = _means_loc_fun
            w0 = self.covariance_obj.get_params()
            bnds = self.covariance_obj.get_bounds()

        if self.optimizer == 'L-BFGS-B':
            mydisp = False
            options = copy.deepcopy(optimizer_options)
            if not optimizer_options is None:
                if 'mydisp' in optimizer_options.keys():
                    mydisp = optimizer_options['mydisp']
                    del options['mydisp']
            res, w_list, time_list = minimize_wrapper(loc_fun, w0, method='L-BFGS-B', mydisp=mydisp, bounds=bnds,
                                                      options=options)
            res = res.x
        elif self.optimizer == 'Projected Newton':
            res, w_list, time_list = projected_newton(loc_fun, w0, bounds=bnds, options=optimizer_options)

        else:
            raise ValueError('Wrong optimizer for svi/means method:' + self.optimizer)

        if self.method == 'vi':
            optimal_params = res[:-num_inputs*dim]
            inducing_points = res[-num_inputs*dim:]
            inducing_points = inducing_points.reshape((dim, num_inputs))
        if self.method == 'means':
            optimal_params = res
            inducing_points = inputs
        self.covariance_obj.set_params(optimal_params)

        mu, Sigma = self._vi_get_optimal_meancov(optimal_params, inducing_points, data_points, target_values)
        self.inducing_inputs = (inducing_points, mu, Sigma)
        return GPRes(deepcopy(w_list), time_lst=deepcopy(time_list))