Пример #1
0
def get_kernel_inverse(
        X_train: np.ndarray,
        hyps: dict,
        str_cov: str,
        fix_noise: bool = constants.FIX_GP_NOISE,
        use_gradient: bool = False,
        debug: bool = False) -> constants.TYPING_TUPLE_THREE_ARRAYS:
    """
    This function computes a kernel inverse without any matrix decomposition techniques.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param use_gradient: flag for computing and returning gradients of
        negative log marginal likelihood.
    :type use_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix
        inverse, and gradients of kernel matrix. If `use_gradient` is False,
        gradients of kernel matrix would be None.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(hyps, dict)
    assert isinstance(str_cov, str)
    assert isinstance(use_gradient, bool)
    assert isinstance(fix_noise, bool)
    assert isinstance(debug, bool)
    utils_covariance.check_str_cov('get_kernel_inverse', str_cov,
                                   X_train.shape)

    cov_X_X = cov_main(str_cov, X_train, X_train, hyps, True) \
        + hyps['noise']**2 * np.eye(X_train.shape[0])
    cov_X_X = (cov_X_X + cov_X_X.T) / 2.0
    inv_cov_X_X = np.linalg.inv(cov_X_X)

    if use_gradient:
        grad_cov_X_X = grad_cov_main(str_cov,
                                     X_train,
                                     X_train,
                                     hyps,
                                     fix_noise,
                                     same_X_Xp=True)
    else:
        grad_cov_X_X = None

    return cov_X_X, inv_cov_X_X, grad_cov_X_X
Пример #2
0
def predict_with_cov(X_train: np.ndarray, Y_train: np.ndarray, X_test: np.ndarray,
    cov_X_X: np.ndarray, inv_cov_X_X: np.ndarray, hyps: dict,
    str_cov: str=constants.STR_COV,
    prior_mu: constants.TYPING_UNION_CALLABLE_NONE=None,
    debug: bool=False
) -> constants.TYPING_TUPLE_THREE_ARRAYS:
    """
    This function returns posterior mean and posterior standard deviation
    functions over `X_test`, computed by Gaussian process regression with
    `X_train`, `Y_train`, `cov_X_X`, `inv_cov_X_X`, and `hyps`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param cov_X_X: kernel matrix over `X_train`. Shape: (n, n).
    :type cov_X_X: numpy.ndarray
    :param inv_cov_X_X: kernel matrix inverse over `X_train`. Shape: (n, n).
    :type inv_cov_X_X: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or callable, optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior
        standard deviation function over `X_test`, and posterior covariance
        matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    utils_gp.validate_common_args(X_train, Y_train, str_cov, prior_mu, debug, X_test)
    assert isinstance(cov_X_X, np.ndarray)
    assert isinstance(inv_cov_X_X, np.ndarray)
    assert isinstance(hyps, dict)
    assert len(cov_X_X.shape) == 2
    assert len(inv_cov_X_X.shape) == 2
    assert (np.array(cov_X_X.shape) == np.array(inv_cov_X_X.shape)).all()
    utils_covariance.check_str_cov('predict_with_cov', str_cov,
        X_train.shape, shape_X2=X_test.shape)

    prior_mu_train = utils_gp.get_prior_mu(prior_mu, X_train)
    prior_mu_test = utils_gp.get_prior_mu(prior_mu, X_test)
    cov_X_Xs = covariance.cov_main(str_cov, X_train, X_test, hyps, False)
    cov_Xs_Xs = covariance.cov_main(str_cov, X_test, X_test, hyps, True)
    cov_Xs_Xs = (cov_Xs_Xs + cov_Xs_Xs.T) / 2.0

    mu_Xs = np.dot(np.dot(cov_X_Xs.T, inv_cov_X_X), Y_train - prior_mu_train) + prior_mu_test
    Sigma_Xs = cov_Xs_Xs - np.dot(np.dot(cov_X_Xs.T, inv_cov_X_X), cov_X_Xs)
    return mu_Xs, np.expand_dims(np.sqrt(np.maximum(np.diag(Sigma_Xs), 0.0)), axis=1), Sigma_Xs
Пример #3
0
def predict_with_optimized_hyps(X_train: np.ndarray, Y_train: np.ndarray, X_test: np.ndarray,
    str_cov: str=constants.STR_COV,
    str_optimizer_method: str=constants.STR_OPTIMIZER_METHOD_GP,
    prior_mu: constants.TYPING_UNION_CALLABLE_NONE=None,
    fix_noise: float=constants.FIX_GP_NOISE,
    debug: bool=False
) -> constants.TYPING_TUPLE_THREE_ARRAYS:
    """
    This function returns posterior mean and posterior standard deviation
    functions over `X_test`, computed by the Gaussian process regression
    optimized with `X_train` and `Y_train`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param str_optimizer_method: the name of optimization method.
    :type str_optimizer_method: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or callable, optional
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior
        standard deviation function over `X_test`, and posterior covariance
        matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    utils_gp.validate_common_args(X_train, Y_train, str_cov, prior_mu, debug, X_test)
    assert isinstance(str_optimizer_method, str)
    assert isinstance(fix_noise, bool)
    utils_covariance.check_str_cov('predict_with_optimized_kernel', str_cov,
        X_train.shape, shape_X2=X_test.shape)
    assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP

    time_start = time.time()

    cov_X_X, inv_cov_X_X, hyps = gp_kernel.get_optimized_kernel(X_train, Y_train,
        prior_mu, str_cov, str_optimizer_method=str_optimizer_method,
        fix_noise=fix_noise, debug=debug)
    mu_Xs, sigma_Xs, Sigma_Xs = predict_with_cov(X_train, Y_train, X_test,
        cov_X_X, inv_cov_X_X, hyps, str_cov=str_cov, prior_mu=prior_mu,
        debug=debug)

    time_end = time.time()
    if debug:
        logger.debug('time consumed to construct gpr: %.4f sec.', time_end - time_start)
    return mu_Xs, sigma_Xs, Sigma_Xs
Пример #4
0
def predict_with_hyps(X_train: np.ndarray, Y_train: np.ndarray, X_test: np.ndarray, hyps: dict,
    str_cov: str=constants.STR_COV,
    prior_mu: constants.TYPING_UNION_CALLABLE_NONE=None,
    debug: bool=False
) -> constants.TYPING_TUPLE_THREE_ARRAYS:
    """
    This function returns posterior mean and posterior standard deviation
    functions over `X_test`, computed by Gaussian process regression with
    `X_train`, `Y_train`, and `hyps`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or callable, optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior
        standard deviation function over `X_test`, and posterior covariance
        matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    utils_gp.validate_common_args(X_train, Y_train, str_cov, prior_mu, debug, X_test)
    assert isinstance(hyps, dict)
    utils_covariance.check_str_cov('predict_with_hyps', str_cov,
        X_train.shape, shape_X2=X_test.shape)

    cov_X_X, inv_cov_X_X, _ = covariance.get_kernel_inverse(X_train,
        hyps, str_cov, debug=debug)
    mu_Xs, sigma_Xs, Sigma_Xs = predict_with_cov(X_train, Y_train, X_test,
        cov_X_X, inv_cov_X_X, hyps, str_cov=str_cov,
        prior_mu=prior_mu, debug=debug)

    return mu_Xs, sigma_Xs, Sigma_Xs
Пример #5
0
def get_optimized_kernel(
        X_train: np.ndarray,
        Y_train: np.ndarray,
        prior_mu: constants.TYPING_UNION_CALLABLE_NONE,
        str_cov: str,
        str_optimizer_method: str = constants.STR_OPTIMIZER_METHOD_GP,
        str_modelselection_method: str = constants.STR_MODELSELECTION_METHOD,
        use_ard: bool = constants.USE_ARD,
        fix_noise: bool = constants.FIX_GP_NOISE,
        debug: bool = False) -> constants.TYPING_TUPLE_TWO_ARRAYS_DICT:
    """
    This function computes the kernel matrix optimized by optimization
    method specified, its inverse matrix, and the optimized hyperparameters.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param prior_mu: prior mean function or None.
    :type prior_mu: callable or NoneType
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param str_optimizer_method: the name of optimization method.
    :type str_optimizer_method: str., optional
    :param str_modelselection_method: the name of model selection method.
    :type str_modelselection_method: str., optional
    :param use_ard: flag for using automatic relevance determination.
    :type use_ard: bool., optional
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix
        inverse, and dictionary of hyperparameters.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.)

    :raises: AssertionError, ValueError

    """

    # TODO: check to input same fix_noise to convert_hyps and restore_hyps
    utils_gp.validate_common_args(X_train, Y_train, str_cov, prior_mu, debug)
    assert isinstance(str_optimizer_method, str)
    assert isinstance(str_modelselection_method, str)
    assert isinstance(use_ard, bool)
    assert isinstance(fix_noise, bool)
    utils_covariance.check_str_cov('get_optimized_kernel', str_cov,
                                   X_train.shape)
    assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP
    assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD
    use_gradient = bool(str_optimizer_method != 'Nelder-Mead')
    # TODO: Now, use_gradient is fixed as False.
    #    use_gradient = False

    time_start = time.time()

    if debug:
        logger.debug('str_optimizer_method: %s', str_optimizer_method)
        logger.debug('str_modelselection_method: %s',
                     str_modelselection_method)
        logger.debug('use_gradient: %s', use_gradient)

    prior_mu_train = utils_gp.get_prior_mu(prior_mu, X_train)
    if str_cov in constants.ALLOWED_COV_BASE:
        num_dim = X_train.shape[1]
    elif str_cov in constants.ALLOWED_COV_SET:
        num_dim = X_train.shape[2]
        use_gradient = False

    if str_modelselection_method == 'ml':
        neg_log_ml_ = lambda hyps: gp_likelihood.neg_log_ml(
            X_train,
            Y_train,
            hyps,
            str_cov,
            prior_mu_train,
            use_ard=use_ard,
            fix_noise=fix_noise,
            use_gradient=use_gradient,
            debug=debug)
    elif str_modelselection_method == 'loocv':
        # TODO: add use_ard.
        neg_log_ml_ = lambda hyps: gp_likelihood.neg_log_pseudo_l_loocv(
            X_train,
            Y_train,
            hyps,
            str_cov,
            prior_mu_train,
            fix_noise=fix_noise,
            debug=debug)
        use_gradient = False
    else:  # pragma: no cover
        raise ValueError(
            'get_optimized_kernel: missing conditions for str_modelselection_method.'
        )

    hyps_converted = utils_covariance.convert_hyps(str_cov,
                                                   utils_covariance.get_hyps(
                                                       str_cov,
                                                       num_dim,
                                                       use_ard=use_ard),
                                                   fix_noise=fix_noise)

    if str_optimizer_method in ['BFGS', 'SLSQP']:
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   jac=use_gradient,
                                                   options={'disp': False})

        if debug:
            logger.debug('negative log marginal likelihood: %.6f',
                         result_optimized.fun)
            logger.debug('scipy message: %s', result_optimized.message)

        result_optimized = result_optimized.x
    elif str_optimizer_method in ['L-BFGS-B', 'SLSQP-Bounded']:
        if str_optimizer_method == 'SLSQP-Bounded':
            str_optimizer_method = 'SLSQP'

        bounds = utils_covariance.get_range_hyps(str_cov,
                                                 num_dim,
                                                 use_ard=use_ard,
                                                 fix_noise=fix_noise)
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   bounds=bounds,
                                                   jac=use_gradient,
                                                   options={'disp': False})

        if debug:
            logger.debug('negative log marginal likelihood: %.6f',
                         result_optimized.fun)
            logger.debug('scipy message: %s', result_optimized.message)
        result_optimized = result_optimized.x
    elif str_optimizer_method in ['Nelder-Mead']:
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   options={'disp': False})

        if debug:
            logger.debug('negative log marginal likelihood: %.6f',
                         result_optimized.fun)
            logger.debug('scipy message: %s', result_optimized.message)
        result_optimized = result_optimized.x
    else:  # pragma: no cover
        raise ValueError(
            'get_optimized_kernel: missing conditions for str_optimizer_method'
        )

    hyps = utils_covariance.restore_hyps(str_cov,
                                         result_optimized,
                                         use_ard=use_ard,
                                         fix_noise=fix_noise)

    hyps = utils_covariance.validate_hyps_dict(hyps, str_cov, num_dim)
    cov_X_X, inv_cov_X_X, _ = covariance.get_kernel_inverse(
        X_train, hyps, str_cov, fix_noise=fix_noise, debug=debug)
    time_end = time.time()

    if debug:
        logger.debug('hyps optimized: %s', utils_logger.get_str_hyps(hyps))
        logger.debug('time consumed to construct gpr: %.4f sec.',
                     time_end - time_start)
    return cov_X_X, inv_cov_X_X, hyps
Пример #6
0
def neg_log_ml(X_train: np.ndarray,
               Y_train: np.ndarray,
               hyps: np.ndarray,
               str_cov: str,
               prior_mu_train: np.ndarray,
               use_ard: bool = constants.USE_ARD,
               fix_noise: bool = constants.FIX_GP_NOISE,
               use_gradient: bool = True,
               debug: bool = False) -> constants.TYPING_UNION_FLOAT_FA:
    """
    This function computes a negative log marginal likelihood.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param hyps: hyperparameters for Gaussian process. Shape: (h, ).
    :type hyps: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1).
    :type prior_mu_train: numpy.ndarray
    :param use_ard: flag for automatic relevance determination.
    :type use_ard: bool., optional
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param use_gradient: flag for computing and returning gradients of
        negative log marginal likelihood.
    :type use_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: negative log marginal likelihood, or (negative log marginal
        likelihood, gradients of the likelihood).
    :rtype: float, or tuple of (float, np.ndarray)

    :raises: AssertionError

    """

    utils_gp.validate_common_args(X_train, Y_train, str_cov, None, debug)
    assert isinstance(hyps, np.ndarray)
    assert isinstance(prior_mu_train, np.ndarray)
    assert isinstance(use_ard, bool)
    assert isinstance(fix_noise, bool)
    assert isinstance(use_gradient, bool)
    assert len(prior_mu_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0]
    utils_covariance.check_str_cov('neg_log_ml', str_cov, X_train.shape)

    num_X = float(X_train.shape[0])
    hyps = utils_covariance.restore_hyps(str_cov,
                                         hyps,
                                         use_ard=use_ard,
                                         fix_noise=fix_noise,
                                         use_gp=False)
    new_Y_train = Y_train - prior_mu_train
    nu = hyps['dof']

    cov_X_X, inv_cov_X_X, grad_cov_X_X = covariance.get_kernel_inverse(
        X_train,
        hyps,
        str_cov,
        fix_noise=fix_noise,
        use_gradient=use_gradient,
        debug=debug)

    alpha = np.dot(inv_cov_X_X, new_Y_train)
    beta = np.squeeze(np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train))

    first_term = -0.5 * num_X * np.log((nu - 2.0) * np.pi)
    sign_second_term, second_term = np.linalg.slogdet(cov_X_X)

    # TODO: it should be checked.
    if sign_second_term <= 0:  # pragma: no cover
        second_term = 0.0

    second_term = -0.5 * second_term

    third_term = np.log(
        scipy.special.gamma(
            (nu + num_X) / 2.0) / scipy.special.gamma(nu / 2.0))
    fourth_term = -0.5 * (nu + num_X) * np.log(1.0 + beta / (nu - 2.0))

    log_ml_ = np.squeeze(first_term + second_term + third_term + fourth_term)
    log_ml_ /= num_X

    if use_gradient:
        assert grad_cov_X_X is not None
        grad_log_ml_ = np.zeros(grad_cov_X_X.shape[2] + 1)

        first_term_grad = ((nu + num_X) /
                           (nu + beta - 2.0) * np.dot(alpha, alpha.T) -
                           inv_cov_X_X)
        nu_grad = -num_X / (2.0 * (nu - 2.0))\
            + scipy.special.digamma((nu + num_X) / 2.0)\
            - scipy.special.digamma(nu / 2.0)\
            - 0.5 * np.log(1.0 + beta / (nu - 2.0))\
            + (nu + num_X) * beta / (2.0 * (nu - 2.0)**2 + 2.0 * beta * (nu - 2.0))

        if fix_noise:
            grad_log_ml_[0] = nu_grad
        else:
            grad_log_ml_[1] = nu_grad

        for ind in range(0, grad_cov_X_X.shape[2]):
            cur_grad = 0.5 * np.trace(
                np.dot(first_term_grad, grad_cov_X_X[:, :, ind]))
            if fix_noise:
                grad_log_ml_[ind + 1] = cur_grad
            else:
                if ind == 0:
                    cur_ind = 0
                else:
                    cur_ind = ind + 1

                grad_log_ml_[cur_ind] = cur_grad

    if use_gradient:
        return -1.0 * log_ml_, -1.0 * grad_log_ml_ / num_X

    return -1.0 * log_ml_
Пример #7
0
def get_kernel_cholesky(
        X_train: np.ndarray,
        hyps: dict,
        str_cov: str,
        fix_noise: bool = constants.FIX_GP_NOISE,
        use_gradient: bool = False,
        debug: bool = False) -> constants.TYPING_TUPLE_THREE_ARRAYS:
    """
    This function computes a kernel inverse with Cholesky decomposition.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param use_gradient: flag for computing and returning gradients of
        negative log marginal likelihood.
    :type use_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, lower matrix computed
        by Cholesky decomposition, and gradients of kernel matrix. If
        `use_gradient` is False, gradients of kernel matrix would be None.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError, ValueError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(hyps, dict)
    assert isinstance(str_cov, str)
    assert isinstance(fix_noise, bool)
    assert isinstance(use_gradient, bool)
    assert isinstance(debug, bool)
    utils_covariance.check_str_cov('get_kernel_cholesky', str_cov,
                                   X_train.shape)

    cov_X_X = cov_main(str_cov, X_train, X_train, hyps, True) \
        + hyps['noise']**2 * np.eye(X_train.shape[0])
    cov_X_X = (cov_X_X + cov_X_X.T) / 2.0

    lower = None

    for jitter_cov in [0.0, 1e-4, 1e-2, 1e-1, 1e0, 1e1, 1e2]:
        try:
            cov_X_X_ = cov_X_X + jitter_cov * np.eye(X_train.shape[0])
            lower = scipy.linalg.cholesky(cov_X_X_, lower=True)

            # TODO: check this.
            cov_X_X = cov_X_X_

            break
        except np.linalg.LinAlgError:  # pragma: no cover
            pass

    if lower is None:  # pragma: no cover
        raise ValueError('jitter_cov is not large enough.')

    if use_gradient:
        grad_cov_X_X = grad_cov_main(str_cov,
                                     X_train,
                                     X_train,
                                     hyps,
                                     fix_noise,
                                     same_X_Xp=True)
    else:
        grad_cov_X_X = None
    return cov_X_X, lower, grad_cov_X_X
Пример #8
0
def neg_log_ml(X_train: np.ndarray, Y_train: np.ndarray, hyps: np.ndarray,
    str_cov: str, prior_mu_train: np.ndarray,
    use_ard: bool=constants.USE_ARD,
    fix_noise: bool=constants.FIX_GP_NOISE,
    use_cholesky: bool=True,
    use_gradient: bool=True,
    debug: bool=False
) -> constants.TYPING_UNION_FLOAT_FA:
    """
    This function computes a negative log marginal likelihood.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param hyps: hyperparameters for Gaussian process. Shape: (h, ).
    :type hyps: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1).
    :type prior_mu_train: numpy.ndarray
    :param use_ard: flag for automatic relevance determination.
    :type use_ard: bool., optional
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param use_cholesky: flag for using a cholesky decomposition.
    :type use_cholesky: bool., optional
    :param use_gradient: flag for computing and returning gradients of
        negative log marginal likelihood.
    :type use_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: negative log marginal likelihood, or (negative log marginal
        likelihood, gradients of the likelihood).
    :rtype: float, or tuple of (float, np.ndarray)

    :raises: AssertionError

    """

    # TODO: add use_ard.
    utils_gp.validate_common_args(X_train, Y_train, str_cov, None, debug)
    assert isinstance(hyps, np.ndarray)
    assert isinstance(prior_mu_train, np.ndarray)
    assert isinstance(use_ard, bool)
    assert isinstance(fix_noise, bool)
    assert isinstance(use_cholesky, bool)
    assert isinstance(use_gradient, bool)
    assert len(prior_mu_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0]
    utils_covariance.check_str_cov('neg_log_ml', str_cov, X_train.shape)

    hyps = utils_covariance.restore_hyps(str_cov, hyps, use_ard=use_ard, fix_noise=fix_noise)
    new_Y_train = Y_train - prior_mu_train
    if use_cholesky:
        cov_X_X, lower, grad_cov_X_X = covariance.get_kernel_cholesky(X_train,
            hyps, str_cov, fix_noise=fix_noise, use_gradient=use_gradient,
            debug=debug)

        alpha = scipy.linalg.cho_solve((lower, True), new_Y_train)

        first_term = -0.5 * np.dot(new_Y_train.T, alpha)
        second_term = -1.0 * np.sum(np.log(np.diagonal(lower) + constants.JITTER_LOG))

        if use_gradient:
            assert grad_cov_X_X is not None

            first_term_grad = np.einsum("ik,jk->ijk", alpha, alpha)
            first_term_grad -= np.expand_dims(scipy.linalg.cho_solve((lower, True),
                np.eye(cov_X_X.shape[0])), axis=2)
            grad_log_ml_ = 0.5 * np.einsum("ijl,ijk->kl", first_term_grad, grad_cov_X_X)
            grad_log_ml_ = np.sum(grad_log_ml_, axis=1)
    else:
        # TODO: use_gradient is fixed.
        use_gradient = False
        cov_X_X, inv_cov_X_X, grad_cov_X_X = covariance.get_kernel_inverse(X_train,
            hyps, str_cov, fix_noise=fix_noise, use_gradient=use_gradient,
            debug=debug)

        first_term = -0.5 * np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train)
        sign_second_term, second_term = np.linalg.slogdet(cov_X_X)

        # TODO: It should be checked.
        if sign_second_term <= 0: # pragma: no cover
            second_term = 0.0

        second_term = -0.5 * second_term

    third_term = -float(X_train.shape[0]) / 2.0 * np.log(2.0 * np.pi)
    log_ml_ = np.squeeze(first_term + second_term + third_term)
    log_ml_ /= X_train.shape[0]

    if use_gradient:
        return -1.0 * log_ml_, -1.0 * grad_log_ml_ / X_train.shape[0]

    return -1.0 * log_ml_
Пример #9
0
def neg_log_pseudo_l_loocv(X_train: np.ndarray, Y_train: np.ndarray, hyps: np.ndarray,
    str_cov: str, prior_mu_train: np.ndarray,
    fix_noise: bool=constants.FIX_GP_NOISE,
    debug: bool=False
) -> float:
    """
    It computes a negative log pseudo-likelihood using leave-one-out cross-validation.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param hyps: hyperparameters for Gaussian process. Shape: (h, ).
    :type hyps: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1).
    :type prior_mu_train: numpy.ndarray
    :param fix_noise: flag for fixing a noise.
    :type fix_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: negative log pseudo-likelihood.
    :rtype: float

    :raises: AssertionError

    """

    # TODO: add use_ard.
    utils_gp.validate_common_args(X_train, Y_train, str_cov, None, debug)
    assert isinstance(hyps, np.ndarray)
    assert isinstance(prior_mu_train, np.ndarray)
    assert isinstance(fix_noise, bool)
    assert len(prior_mu_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0]
    utils_covariance.check_str_cov('neg_log_pseudo_l_loocv', str_cov, X_train.shape)

    num_data = X_train.shape[0]
    hyps = utils_covariance.restore_hyps(str_cov, hyps, fix_noise=fix_noise)

    _, inv_cov_X_X, _ = covariance.get_kernel_inverse(X_train, hyps,
        str_cov, fix_noise=fix_noise, debug=debug)

    log_pseudo_l_ = 0.0
    for ind_data in range(0, num_data):
        # TODO: check this.
#        cur_X_train = np.vstack((X_train[:ind_data], X_train[ind_data+1:]))
#        cur_Y_train = np.vstack((Y_train[:ind_data], Y_train[ind_data+1:]))

#        cur_X_test = np.expand_dims(X_train[ind_data], axis=0)
        cur_Y_test = Y_train[ind_data]

        cur_mu = np.squeeze(cur_Y_test) \
            - np.dot(inv_cov_X_X, Y_train)[ind_data] / inv_cov_X_X[ind_data, ind_data]
        cur_sigma = np.sqrt(1.0 / (inv_cov_X_X[ind_data, ind_data] + constants.JITTER_COV))

        first_term = -0.5 * np.log(cur_sigma**2)
        second_term = -0.5 * (np.squeeze(cur_Y_test - cur_mu))**2 / (cur_sigma**2)
        third_term = -0.5 * np.log(2.0 * np.pi)
        cur_log_pseudo_l_ = first_term + second_term + third_term
        log_pseudo_l_ += cur_log_pseudo_l_

    log_pseudo_l_ /= num_data
    log_pseudo_l_ *= -1.0

    return log_pseudo_l_
Пример #10
0
def test_check_str_cov():
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov(1, 'se', (2, 1))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test', 1, (2, 1))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test', 'se', 1)
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test', 'se', (2, 100, 100))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test',
                                     'se', (2, 100),
                                     shape_X2=(2, 100, 100))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test',
                                     'set_se', (2, 100),
                                     shape_X2=(2, 100, 100))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test',
                                     'set_se', (2, 100, 100),
                                     shape_X2=(2, 100))
    with pytest.raises(AssertionError) as error:
        package_target.check_str_cov('test', 'se', (2, 1), shape_X2=1)

    with pytest.raises(ValueError) as error:
        package_target.check_str_cov('test', 'abc', (2, 1))