Exemplo n.º 1
0
    def __init__(self, x, y, alpha=0., sigma=None, lamb=None, kernel_num=100):

        self.__x = transform_data(x)
        self.__y = transform_data(y)

        if self.__x.shape[1] != self.__y.shape[1]:
            raise ValueError("x and y must be same dimentions.")

        if sigma is None:
            sigma = np.logspace(-4, 9, 14)

        if lamb is None:
            lamb = np.logspace(-4, 9, 14)

        self.__x_num_row = self.__x.shape[0]
        self.__y_num_row = self.__y.shape[0]
        self.__kernel_num = min(
            [kernel_num, self.__x_num_row]
        )  # kernel number is the minimum number of x's lines and the number of kernel.
        self.__centers = np.array(
            rand.sample(list(self.__x), k=self.__kernel_num)
        )  # randomly choose candidates of rbf kernel centroid.
        self.__n_minimum = min(self.__x_num_row, self.__y_num_row)
        self.__kernel = jit(partial(gauss_kernel, centers=self.__centers))

        self._RuLSIF(
            x=self.__x,
            y=self.__y,
            alpha=alpha,
            s_sigma=np.atleast_1d(sigma),
            s_lambda=np.atleast_1d(lamb),
        )
Exemplo n.º 2
0
    def __init__(self, x, y, alpha=0., sigma=None, lamb=None, kernel_num=100):
        """[summary]

        Args:
            x (array-like of float): 
                Numerator samples array. x is generated from p(x).
            y (array-like of float): 
                Denumerator samples array. y is generated from q(x).
            alpha (float or array-like, optional): 
                The alpha is a parameter that can adjust the mixing ratio r(x) = p(x)/(alpha*p(x)+(1-alpha)q(x))
                , and is set in the range of 0-1. 
                Defaults to 0.
            sigma (float or array-like, optional): 
                Bandwidth of kernel. If a value is set for sigma, that value is used for kernel bandwidth
                , and if a numerical array is set for sigma, Densratio selects the optimum value by using CV.
                Defaults to array of 10e-4 to 10e+9 divided into 14 on the log scale.
            lamb (float or array-like, optional): 
                Regularization parameter. If a value is set for lamb, that value is used for hyperparameter
                , and if a numerical array is set for lamb, Densratio selects the optimum value by using CV.
                Defaults to array of 10e-4 to 10e+9 divided into 14 on the log scale.
            kernel_num (int, optional): The number of kernels in the linear model. Defaults to 100.

        Raises:
            ValueError: [description]
        """        

        self.__x = transform_data(x)
        self.__y = transform_data(y)

        if self.__x.shape[1] != self.__y.shape[1]:
            raise ValueError("x and y must be same dimentions.")

        if sigma is None:
            sigma = np.logspace(-3,1,9)

        if lamb is None:
            lamb = np.logspace(-3,1,9)

        self.__x_num_row = self.__x.shape[0]
        self.__y_num_row = self.__y.shape[0]
        self.__kernel_num = np.min(np.array([kernel_num, self.__x_num_row])).item() #kernel number is the minimum number of x's lines and the number of kernel.
        self.__centers = np.array(rand.sample(list(self.__x),k=self.__kernel_num)) #randomly choose candidates of rbf kernel centroid.
        self.__n_minimum = min(self.__x_num_row, self.__y_num_row)
        # self.__kernel  = jit(partial(gauss_kernel,centers=self.__centers))

        self._RuLSIF(x = self.__x,
                     y = self.__y,
                     alpha = alpha,
                     s_sigma = np.atleast_1d(sigma),
                     s_lambda = np.atleast_1d(lamb),
                    )
Exemplo n.º 3
0
def cts_lr_fields(mdp):
    """
    How does the learning rate change the vector field???
    """
    n = 3
    lrs = np.logspace(-5, 0, n * n)

    pis = gen_grid_policies(N=31)
    vs = polytope(mdp.P, mdp.r, mdp.discount, pis)
    qs = [np.einsum('ijk,i->jk', mdp.P, v) for v in vs]
    many_cores = fitted_cores(mdp, qs)

    plt.figure(figsize=(16, 16))
    plt.title('PVI')
    for i, lr in enumerate(lrs):

        dpvis = pvi_vector_field(mdp, many_cores, lr)
        # dont expect vi to change with the lr?!
        # dvis = vi_vector_field(mdp, qs, lr)

        plt.subplot(n, n, i + 1)
        plt.title('lr: {:.3f}'.format(lr))
        plt_field(vs, dpvis)

        # plt.title('Pamameterised VI')
    # plt.savefig('figs/lr_limit_{:.3f}.png'.format(lr))
    plt.savefig('traj-figs/lr_limit_pvi.png', dpi=300)
Exemplo n.º 4
0
def pressure_layer(logPtop=-8., logPbtm=2., NP=20, mode='ascending'):
    """generating the pressure layer.

    Args:
       logPtop: log10(P[bar]) at the top layer
       logPbtm: log10(P[bar]) at the bottom layer
       NP: the number of the layers

    Returns:
         Parr: pressure layer
         dParr: delta pressure layer
         k: k-factor, P[i-1] = k*P[i]

    Note:
        dParr[i] = Parr[i] - Parr[i-1], dParr[0] = (1-k) Parr[0] for ascending mode
    """
    dlogP = (logPbtm - logPtop) / (NP - 1)
    k = 10**-dlogP
    Parr = jnp.logspace(logPtop, logPbtm, NP)
    dParr = (1.0 - k) * Parr
    if mode == 'descending':
        Parr = Parr[::-1]
        dParr = dParr[::-1]

    return jnp.array(Parr), jnp.array(dParr), k
Exemplo n.º 5
0
def _growth_factor_gamma(cosmo, a, log10_amin=-3, steps=128):
    r""" Computes growth factor by integrating the growth rate provided by the
    \gamma parametrization. Normalized such that D( a=1) =1

    Parameters
    ----------
    a: array_like
      Scale factor

    amin: float
      Mininum scale factor, default 1e-3

    Returns
    -------
    D:  ndarray, or float if input scalar
        Growth factor computed at requested scale factor

    """
    # Check if growth has already been computed, if not, compute it
    if not "background.growth_factor" in cosmo._workspace.keys():
        # Compute tabulated array
        atab = np.logspace(log10_amin, 0.0, steps)

        def integrand(y, loga):
            xa = np.exp(loga)
            return _growth_rate_gamma(cosmo, xa)

        gtab = np.exp(odeint(integrand, np.log(atab[0]), np.log(atab)))
        gtab = gtab / gtab[-1]  # Normalize to a=1.
        cache = {"a": atab, "g": gtab}
        cosmo._workspace["background.growth_factor"] = cache
    else:
        cache = cosmo._workspace["background.growth_factor"]
    return np.clip(interp(a, cache["a"], cache["g"]), 0.0, 1.0)
Exemplo n.º 6
0
def tune_lr(method_id, method_params, problem_id, problem_params):
    #print("Learning Rate Tuning not yet available!")
    #return method_params
    loss = lambda a, b: np.sum((a - b)**2)
    optimizer = method_params['optimizer']
    search_space = {'optimizer': []}  # parameters for ARMA method
    lr_start, lr_stop = -1, -3  # search learning rates from 10^start to 10^stop
    learning_rates = np.logspace(lr_start, lr_stop,
                                 1 + 2 * np.abs(lr_start - lr_stop))
    for lr in learning_rates:
        search_space['optimizer'].append(
            optimizer(learning_rate=lr))  # create instance and append
    trials, min_steps = None, 100
    hpo = GridSearch()  # hyperparameter optimizer
    optimal_params, optimal_loss = hpo.search(
        method_id,
        method_params,
        problem_id,
        problem_params,
        loss,
        search_space,
        trials=trials,
        smoothing=10,
        min_steps=min_steps,
        verbose=0)  # run each model at least 1000 steps
    return optimal_params
Exemplo n.º 7
0
def test_gaussian_log_likelihood():
    n_ell = 5
    ell = jnp.logspace(1, 3, n_ell)
    nz1 = smail_nz(1.0, 2.0, 1.0)
    nz2 = smail_nz(1.0, 2.0, 0.5)
    n_cls = 3
    P = [probes.NumberCounts([nz1, nz2], constant_linear_bias(1.0))]
    cosmo = Planck15()
    mu, cov_sparse = gaussian_cl_covariance_and_mean(cosmo,
                                                     ell,
                                                     P,
                                                     sparse=True)
    cov_dense = to_dense(cov_sparse)
    data = 1.1 * mu
    for include_logdet in (True, False):
        loglike_sparse = gaussian_log_likelihood(data,
                                                 mu,
                                                 cov_sparse,
                                                 include_logdet=include_logdet)
        for method in "inverse", "cholesky":
            loglike_dense = gaussian_log_likelihood(
                data,
                mu,
                cov_dense,
                include_logdet=include_logdet,
                inverse_method=method,
            )
            assert_allclose(loglike_sparse, loglike_dense, rtol=1e-6)
Exemplo n.º 8
0
def test_grid_search_lstm(show=False):
    problem_id = "SP500-v0"
    method_id = "LSTM"
    problem_params = {}  # {'p':4, 'q':1} # params for ARMA problem
    method_params = {'n': 1, 'm': 1}
    loss = lambda a, b: np.sum((a - b)**2)
    search_space = {
        'l': [3, 4, 5, 6],
        'h': [2, 5, 8],
        'optimizer': []
    }  # parameters for ARMA method
    opts = [Adam, Adagrad, ONS, OGD]
    lr_start, lr_stop = -1, -3  # search learning rates from 10^start to 10^stop
    learning_rates = np.logspace(lr_start, lr_stop,
                                 1 + 2 * np.abs(lr_start - lr_stop))
    for opt, lr in itertools.product(opts, learning_rates):
        search_space['optimizer'].append(
            opt(learning_rate=lr))  # create instance and append

    trials, min_steps = 10, 100
    hpo = GridSearch()  # hyperparameter optimizer
    optimal_params, optimal_loss = hpo.search(
        method_id,
        method_params,
        problem_id,
        problem_params,
        loss,
        search_space,
        trials=trials,
        smoothing=10,
        min_steps=min_steps,
        verbose=show)  # run each model at least 1000 steps

    if show:
        print("optimal params: ", optimal_params)
        print("optimal loss: ", optimal_loss)

    # test resulting method params
    method = tigerforecast.method(method_id)
    method.initialize(**optimal_params)
    problem = tigerforecast.problem(problem_id)
    x = problem.initialize(**problem_params)
    loss = []
    if show:
        print("run final test with optimal parameters")
    for t in range(5000):
        y_pred = method.predict(x)
        y_true = problem.step()
        loss.append(mse(y_pred, y_true))
        method.update(y_true)
        x = y_true

    if show:
        print("plot results")
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(10)
        plt.close()
Exemplo n.º 9
0
def test_comparison_hjert_scipy():

    Na = 300
    vl = -3
    vm = 5
    xarrv = jnp.logspace(vl, vm, Na)
    xarr = xarrv[:, None] * jnp.ones((Na, Na))
    aarrv = jnp.logspace(vl, vm, Na)
    aarr = aarrv[None, :] * jnp.ones((Na, Na))

    # scipy
    def H(a, x):
        z = x + (1j) * a
        w = sc_wofz(z)
        return w.real

    # hjert
    def vhjert(a):
        return vmap(hjert, (0, None), 0)(xarrv, a)

    vvhjert = jit(vmap(vhjert, 0, 0))
    diffarr = (vvhjert(aarrv).T - H(aarr, xarr)) / H(aarr, xarr)
    print('MEDIAN=', np.median(diffarr), 'MAX=', np.max(diffarr))

    # figure
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter
    fig = plt.figure()
    ax = fig.add_subplot(111)
    c = ax.imshow((vvhjert(aarrv).T - H(aarr, xarr)) / H(aarr, xarr),
                  vmin=-1.e-6,
                  vmax=1.e-6,
                  cmap='RdBu',
                  extent=([vl, vm, vm, vl]),
                  rasterized=True)
    plt.gca().invert_yaxis()
    plt.ylabel('$\log_{10}(x)$')
    plt.xlabel('$\log_{10}(a)$')
    cb = plt.colorbar(c)
    cb.formatter.set_powerlimits((0, 0))
    cb.set_label('(hjert - scipy)/scipy', size=14)
    plt.savefig('hjert.png', bbox_inches='tight', pad_inches=0.0)
    plt.savefig('hjert.pdf', bbox_inches='tight', pad_inches=0.0)

    assert np.max(diffarr) < 1.e-6
Exemplo n.º 10
0
def test_vterm():
    g = 980.
    drho = 1.0
    rho = 1.29*1.e-3  # g/cm3
    vfactor, Tr = viscosity.calc_vfactor(atm='Air')
    eta = viscosity.eta_Rosner(300.0, vfactor)
    r = jnp.logspace(-5, 0, 70)
    vfall = vterm.vf(r, g, eta, drho, rho)
    assert jnp.mean(vfall)-328.12296 < 1.e-5
Exemplo n.º 11
0
def ell_binning():
    # we put this here to make sure it's used consistently
    # plausible limits I guess
    ell_max = 2000
    n_ell = 100
    # choose ell bins from 10 .. 2000 log spaced
    ell_edges  = np.logspace(2, np.log10(ell_max), n_ell+1)
    ell = 0.5*(ell_edges[1:]+ell_edges[:-1])
    delta_ell =(ell_edges[1:]-ell_edges[:-1])
    return ell, delta_ell
Exemplo n.º 12
0
def _halofit_parameters(cosmo, a, transfer_fn):
    r""" Computes the non linear scale,
         effective spectral index,
         spectral curvature
    """
    # Step 1: Finding the non linear scale for which sigma(R)=1
    # That's our search range for the non linear scale
    r = np.logspace(-3, 1, 256)

    @jax.vmap
    def R_nl(a):
        def int_sigma(logk):
            k = np.exp(logk)
            y = np.outer(k, r)
            pk = linear_matter_power(cosmo, k, transfer_fn=transfer_fn)
            g = bkgrd.growth_factor(cosmo, np.atleast_1d(a))
            return (
                np.expand_dims(pk * k ** 3, axis=1)
                * np.exp(-(y ** 2))
                / (2.0 * np.pi ** 2)
                * g ** 2
            )

        sigma = simps(int_sigma, np.log(1e-4), np.log(1e4), 256)
        root = interp(np.atleast_1d(1.0), sigma, r)
        return root

    # Compute non linear scale
    k_nl = 1.0 / R_nl(np.atleast_1d(a)).squeeze()

    # Step 2: Retrieve the spectral index and spectral curvature
    def integrand(logk):
        k = np.exp(logk)
        y = np.outer(k, 1.0 / k_nl)
        pk = linear_matter_power(cosmo, k, transfer_fn=transfer_fn)
        g = np.expand_dims(bkgrd.growth_factor(cosmo, np.atleast_1d(a)), 0)
        res = (
            np.expand_dims(pk * k ** 3, axis=1)
            * np.exp(-(y ** 2))
            * g ** 2
            / (2.0 * np.pi ** 2)
        )
        dneff_dlogk = 2 * res * y ** 2
        dC_dlogk = 4 * res * (y ** 2 - y ** 4)
        return np.stack([dneff_dlogk, dC_dlogk], axis=1)

    res = simps(integrand, np.log(1e-4), np.log(1e4), 256)

    n_eff = res[0] - 3.0
    C = res[0] ** 2 + res[1]

    return k_nl, n_eff, C
Exemplo n.º 13
0
def test_comparison_hjert_scipy():

    Na=300
    vl=-3
    vm=5
    xarrv=jnp.logspace(vl,vm,Na)
    xarr=xarrv[:,None]*jnp.ones((Na,Na))
    aarrv=jnp.logspace(vl,vm,Na)
    aarr=aarrv[None,:]*jnp.ones((Na,Na))
    
    #scipy
    def H(a,x):
        z=x+(1j)*a
        w = sc_wofz(z)
        return w.real

    # hjert
    def vhjert(a):
        return vmap(hjert,(0,None),0)(xarrv,a)
    
    vvhjert=jit(vmap(vhjert,0,0))
    diffarr=(vvhjert(aarrv).T-H(aarr,xarr))/H(aarr,xarr)
    assert np.max(diffarr)<1.e-6
Exemplo n.º 14
0
def test_cubic_spline():
    # We sample some irregularly sampled points
    x = np.logspace(-2, 1, 64)
    y = _testing_function(x)

    spl = InterpolatedUnivariateSpline(x, y, k=3)
    spl_ref = RefSpline(x, y, k=3)

    # Vector of points at which to interpolate, note that this goes outside of
    # the interpolation data, so we are also testing extrapolation
    t = np.linspace(-1, 11, 128)

    assert_allclose(spl_ref(t), spl(t), rtol=1e-10)

    # Test the antiderivative, up to integration constant
    a = spl_ref.antiderivative()(t) - spl_ref.antiderivative()(0.01)
    b = spl.antiderivative(t) - spl.antiderivative(0.01)
    assert_allclose(a, b, rtol=1e-10)
Exemplo n.º 15
0
def radial_comoving_distance(cosmo, a, log10_amin=-3, steps=256):
    r"""Radial comoving distance in [Mpc/h] for a given scale factor.

    Parameters
    ----------
    a : array_like
        Scale factor

    Returns
    -------
    chi : ndarray, or float if input scalar
        Radial comoving distance corresponding to the specified scale
        factor.

    Notes
    -----
    The radial comoving distance is computed by performing the following
    integration:

    .. math::

        \chi(a) =  R_H \int_a^1 \frac{da^\prime}{{a^\prime}^2 E(a^\prime)}
    """
    # Check if distances have already been computed
    if not "background.radial_comoving_distance" in cosmo._workspace.keys():
        # Compute tabulated array
        atab = np.logspace(log10_amin, 0.0, steps)

        def dchioverdlna(y, x):
            xa = np.exp(x)
            return dchioverda(cosmo, xa) * xa

        chitab = odeint(dchioverdlna, 0.0, np.log(atab))
        # np.clip(- 3000*np.log(atab), 0, 10000)#odeint(dchioverdlna, 0., np.log(atab), cosmo)
        chitab = chitab[-1] - chitab

        cache = {"a": atab, "chi": chitab}
        cosmo._workspace["background.radial_comoving_distance"] = cache
    else:
        cache = cosmo._workspace["background.radial_comoving_distance"]

    a = np.atleast_1d(a)
    # Return the results as an interpolation of the table
    return np.clip(interp(a, cache["a"], cache["chi"]), 0.0)
Exemplo n.º 16
0
    def testNTKPredCovPosDef(self, train_shape, test_shape, network,
                             out_logits):
        key = random.PRNGKey(0)

        key, split = random.split(key)
        x_train = np.cos(random.normal(split, train_shape))

        key, split = random.split(key)
        y_train = np.array(
            random.bernoulli(split, shape=(train_shape[0], out_logits)),
            np.float32)

        key, split = random.split(key)
        x_test = np.cos(random.normal(split, test_shape))
        _, _, ker_fun = _build_network(train_shape[1:], network, out_logits)

        reg = 1e-7
        ntk_predictions = predict.gradient_descent_mse_gp(ker_fun,
                                                          x_train,
                                                          y_train,
                                                          x_test,
                                                          diag_reg=reg,
                                                          get='ntk',
                                                          compute_cov=True)

        ts = np.logspace(-2, 8, 10)

        ntk_cov_predictions = [ntk_predictions(t).covariance for t in ts]

        if xla_bridge.get_backend().platform == 'tpu':
            eigh = np.onp.linalg.eigh
        else:
            eigh = np.linalg.eigh

        check_symmetric = np.array(
            [np.max(np.abs(cov - cov.T)) for cov in ntk_cov_predictions])
        check_pos_evals = np.min(
            np.array([eigh(cov)[0] + 1e-10 for cov in ntk_cov_predictions]))

        self.assertAllClose(check_symmetric, np.zeros_like(check_symmetric),
                            True)
        self.assertGreater(check_pos_evals, 0., True)
Exemplo n.º 17
0
def test_grid_search_arma(show=False):
    environment_id = "LDS"
    controller_id = "GPC"
    environment_params = {'n':3, 'm':2}
    controller_params = {}
    loss = lambda a, b: np.sum((a-b)**2)
    search_space = {'optimizer':[]} # parameters for LQR controller
    opts = [Adam, Adagrad, ONS, OGD]
    lr_start, lr_stop = 0, -4 # search learning rates from 10^start to 10^stop 
    learning_rates = np.logspace(lr_start, lr_stop, 1+2*np.abs(lr_start - lr_stop))
    for opt, lr in itertools.product(opts, learning_rates):
        search_space['optimizer'].append(opt(learning_rate=lr)) # create instance and append

    trials = 15
    hpo = GridSearch() # hyperparameter optimizer
    optimal_params, optimal_loss = hpo.search(controller_id, controller_params, environment_id, environment_params, loss, 
        search_space, trials=trials, smoothing=10, start_steps=100, verbose=show)

    if show:
        print("optimal loss: ", optimal_loss)
        print("optimal params: ", optimal_params)

    # test resulting controller params
    controller = tigercontrol.controllers(controller_id)
    controller.initialize(**optimal_params)
    environment = tigercontrol.environment(environment_id)
    x = environment.reset(**environment_params)
    loss = []
    if show:
        print("run final test with optimal parameters")
    for t in range(5000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(10)
        plt.close()
Exemplo n.º 18
0
  def testPredCovPosDef(self, train_shape, test_shape, network, out_logits):
    _, x_test, x_train, y_train = self._get_inputs(out_logits, test_shape,
                                                   train_shape)
    _, _, ker_fun = _build_network(train_shape[1:], network, out_logits)

    ts = np.logspace(-3, 3, 10)
    predict_fn_mse_ens = predict.gradient_descent_mse_ensemble(
        ker_fun, x_train, y_train)

    for get in ('nngp', 'ntk'):
      for x in (None, 'x_test'):
        for t in (None, 'ts'):
          with self.subTest(get=get, x=x, t=t):
            cov = predict_fn_mse_ens(t=t if t is None else ts,
                                     get=get,
                                     x_test=x if x is None else x_test,
                                     compute_cov=True).covariance

            self.assertAllClose(cov, np.moveaxis(cov, -1, -2))
            self.assertGreater(np.min(np.linalg.eigh(cov)[0]), -1e-4)
Exemplo n.º 19
0
def growth_factor(cosmo, a, log10_amin=-3, steps=100, eps=1e-4):
    """ Compute Growth factor at a given scale factor, normalised such
  that G(a=1) = 1.

  Parameters
  ----------
  a: array_like
    Scale factor

  amin: float
    Mininum scale factor, default 1e-3

  Returns
  -------
  G:  ndarray, or float if input scalar
      Growth factor computed at requested scale factor
  """
    # Check if growth has already been computed
    if not 'background.growth_factor' in cosmo._workspace.keys():
        # Compute tabulated array
        atab = np.logspace(log10_amin, 0., steps)

        def D_derivs(y, x, cosmo):
            q = (2.0 - 0.5 *
                 (Omega_m_a(cosmo, x) +
                  (1.0 + 3.0 * w(cosmo, x)) * Omega_de_a(cosmo, x))) / x
            r = 1.5 * Omega_m_a(cosmo, x) / x / x
            return [y[1], -q * y[1] + r * y[0]]

        y0 = [atab[0], 1.0]
        y1, y2 = odeint(D_derivs, y0, atab, cosmo)

        gtab = y1 / y1[-1]

        cache = {'a': atab, 'g': gtab}
        cosmo._workspace['background.growth_factor'] = cache
    else:
        cache = cosmo._workspace['background.growth_factor']

    a = np.clip(np.atleast_1d(a), 10.**log10_amin, 1.0 - eps)
    return np.clip(interp(a, cache['a'], cache['g']), 0., 1.0)
Exemplo n.º 20
0
def _growth_factor_ODE(cosmo, a, log10_amin=-3, steps=128, eps=1e-4):
    """ Compute linear growth factor D(a) at a given scale factor,
    normalised such that D(a=1) = 1.

    Parameters
    ----------
    a: array_like
      Scale factor

    amin: float
      Mininum scale factor, default 1e-3

    Returns
    -------
    D:  ndarray, or float if input scalar
        Growth factor computed at requested scale factor
    """
    # Check if growth has already been computed
    if not "background.growth_factor" in cosmo._workspace.keys():
        # Compute tabulated array
        atab = np.logspace(log10_amin, 0.0, steps)

        def D_derivs(y, x):
            q = (2.0 - 0.5 *
                 (Omega_m_a(cosmo, x) +
                  (1.0 + 3.0 * w(cosmo, x)) * Omega_de_a(cosmo, x))) / x
            r = 1.5 * Omega_m_a(cosmo, x) / x / x
            return np.array([y[1], -q * y[1] + r * y[0]])

        y0 = np.array([atab[0], 1.0])
        y = odeint(D_derivs, y0, atab)
        y1 = y[:, 0]
        gtab = y1 / y1[-1]
        # To transform from dD/da to dlnD/dlna: dlnD/dlna = a / D dD/da
        ftab = y[:, 1] / y1[-1] * atab / gtab

        cache = {"a": atab, "g": gtab, "f": ftab}
        cosmo._workspace["background.growth_factor"] = cache
    else:
        cache = cosmo._workspace["background.growth_factor"]
    return np.clip(interp(a, cache["a"], cache["g"]), 0.0, 1.0)
Exemplo n.º 21
0
def test_sparse_cov():
    n_ell = 25
    ell = jnp.logspace(1, 3, n_ell)
    nz1 = smail_nz(1.0, 2.0, 1.0)
    nz2 = smail_nz(1.0, 2.0, 0.5)
    n_cls = 3
    P = [probes.NumberCounts([nz1, nz2], constant_linear_bias(1.0))]
    cl_signal = jnp.ones((n_cls, n_ell))
    cl_noise = jnp.ones_like(cl_signal)
    cov_dense = gaussian_cl_covariance(ell,
                                       P,
                                       cl_signal,
                                       cl_noise,
                                       sparse=False)
    cov_sparse = gaussian_cl_covariance(ell,
                                        P,
                                        cl_signal,
                                        cl_noise,
                                        sparse=True)
    assert cov_sparse.shape == (n_cls, n_cls, n_ell)
    assert_array_equal(to_dense(cov_sparse), cov_dense)
Exemplo n.º 22
0
N = 1500
nus, wav, res = nugrid(22900, 22960, N, unit='AA')
# mdbM=moldb.MdbExomol('.database/CO/12C-16O/Li2015',nus)
# loading molecular database
# molmass=molinfo.molmass("CO") #molecular mass (CO)
mdbM = moldb.MdbExomol('.database/H2O/1H2-16O/POKAZATEL', nus,
                       crit=1.e-45)  # loading molecular dat
molmassM = molinfo.molmass('H2O')  # molecular mass (H2O)

q = mdbM.qr_interp(1500.0)
S = SijT(1500.0, mdbM.logsij0, mdbM.nu_lines, mdbM.elower, q)
mask = S > 1.e-25
mdbM.masking(mask)

Tarr = jnp.logspace(jnp.log10(800), jnp.log10(1600), 100)
qt = vmap(mdbM.qr_interp)(Tarr)
SijM = jit(vmap(SijT,
                (0, None, None, None, 0)))(Tarr, mdbM.logsij0, mdbM.nu_lines,
                                           mdbM.elower, qt)

imax = jnp.argmax(SijM, axis=0)
Tmax = Tarr[imax]
print(jnp.min(Tmax))

pl = planck.piBarr(jnp.array([1100.0, 1000.0]), nus)
print(pl[1] / pl[0])

pl = planck.piBarr(jnp.array([1400.0, 1200.0]), nus)
print(pl[1] / pl[0])
Exemplo n.º 23
0
def logspace_epsilons(num_epsilons: int,
                      epsilon: float = 0.017) -> Sequence[float]:
    """`num_epsilons` of logspace-distributed values, with median `epsilon`."""
    if num_epsilons <= 1:
        return (epsilon, )
    return jnp.logspace(1, 8, num_epsilons, base=epsilon**(2. / 9.))
Exemplo n.º 24
0
    def testNTK_NTKNNGPAgreement(self, train_shape, test_shape, network,
                                 out_logits):
        key = random.PRNGKey(0)

        key, split = random.split(key)
        x_train = np.cos(random.normal(split, train_shape))

        key, split = random.split(key)
        y_train = np.array(
            random.bernoulli(split, shape=(train_shape[0], out_logits)),
            np.float32)

        key, split = random.split(key)
        x_test = np.cos(random.normal(split, test_shape))
        _, _, ker_fun = _build_network(train_shape[1:], network, out_logits)

        reg = 1e-7
        prediction = predict.gradient_descent_mse_gp(ker_fun,
                                                     x_train,
                                                     y_train,
                                                     x_test,
                                                     diag_reg=reg,
                                                     get='NTK',
                                                     compute_cov=True)

        ts = np.logspace(-2, 8, 10)
        ntk_predictions = [prediction(t).mean for t in ts]

        # Create a hacked kernel function that always returns the ntk kernel
        def always_ntk(x1, x2, get=('nngp', 'ntk')):
            out = ker_fun(x1, x2, get=('nngp', 'ntk'))
            if get == 'nngp' or get == 'ntk':
                return out.ntk
            else:
                return out._replace(nngp=out.ntk)

        ntk_nngp_prediction = predict.gradient_descent_mse_gp(always_ntk,
                                                              x_train,
                                                              y_train,
                                                              x_test,
                                                              diag_reg=reg,
                                                              get='NNGP',
                                                              compute_cov=True)

        ntk_nngp_predictions = [ntk_nngp_prediction(t).mean for t in ts]

        # Test if you use the nngp equations with the ntk, you get the same mean
        self.assertAllClose(ntk_predictions, ntk_nngp_predictions, True)

        # Next test that if you go through the NTK code path, but with only
        # the NNGP kernel, we recreate the NNGP dynamics.
        reg = 1e-7
        nngp_prediction = predict.gradient_descent_mse_gp(ker_fun,
                                                          x_train,
                                                          y_train,
                                                          x_test,
                                                          diag_reg=reg,
                                                          get='NNGP',
                                                          compute_cov=True)

        # Create a hacked kernel function that always returns the nngp kernel
        def always_nngp(x1, x2, get=('nngp', 'ntk')):
            out = ker_fun(x1, x2, get=('nngp', 'ntk'))
            if get == 'nngp' or get == 'ntk':
                return out.nngp
            else:
                return out._replace(ntk=out.nngp)

        nngp_ntk_prediction = predict.gradient_descent_mse_gp(always_nngp,
                                                              x_train,
                                                              y_train,
                                                              x_test,
                                                              diag_reg=reg,
                                                              get='NTK',
                                                              compute_cov=True)

        nngp_cov_predictions = [nngp_prediction(t).covariance for t in ts]
        nngp_ntk_cov_predictions = [
            nngp_ntk_prediction(t).covariance for t in ts
        ]

        # Test if you use the ntk equations with the nngp, you get the same cov
        # Although, due to accumulation of numerical errors, only roughly.
        self.assertAllClose(nngp_cov_predictions, nngp_ntk_cov_predictions,
                            True)
Exemplo n.º 25
0
def _newton_update(weights_0,
                   X,
                   XX_T,
                   target,
                   k,
                   method_,
                   maxiter=int(1024),
                   ftol=1e-12,
                   gtol=1e-8,
                   reg_lambda=0.0,
                   reg_mu=None,
                   ref_row=True,
                   initializer=None,
                   reg_format=None):

    L_list = [
        float(
            _objective(weights_0, X, XX_T, target, k, method_, reg_lambda,
                       reg_mu, ref_row, initializer, reg_format))
    ]

    weights = weights_0.copy()

    # TODO move this to the initialization
    if method_ is None:
        weights = jax_np.zeros_like(weights)

    for i in range(0, maxiter):

        gradient = _gradient(weights, X, XX_T, target, k, method_, reg_lambda,
                             reg_mu, ref_row, initializer, reg_format)

        if jax_np.abs(gradient).sum() < gtol:
            break

        # FIXME hessian is ocasionally NaN
        hessian = _hessian(weights, X, XX_T, target, k, method_, reg_lambda,
                           reg_mu, ref_row, initializer, reg_format)

        if method_ == 'FixDiag':
            updates = gradient / hessian
        else:
            try:
                inverse = scipy.linalg.pinv2(hessian)
                updates = jax_np.matmul(inverse, gradient)
            except (np.linalg.LinAlgError, ValueError) as err:
                logging.error(err)
                updates = gradient

        for step_size in jax_np.hstack(
            (jax_np.linspace(1, 0.1, 10), jax_np.logspace(-2, -32, 31))):

            tmp_w = weights - (updates * step_size).ravel()

            if jax_np.any(jax_np.isnan(tmp_w)):
                logging.debug("{}: There are NaNs in tmp_w".format(method_))

            L = _objective(tmp_w, X, XX_T, target, k, method_, reg_lambda,
                           reg_mu, ref_row, initializer, reg_format)

            if (L - L_list[-1]) < 0:
                break

        L_list.append(float(L))

        logging.debug(
            "{}: after {} iterations log-loss = {:.7e}, sum_grad = {:.7e}".
            format(method_, i, L,
                   jax_np.abs(gradient).sum()))

        if jax_np.isnan(L):
            logging.error("{}: log-loss is NaN".format(method_))
            break

        if i >= 5:
            if (float(np.min(np.diff(L_list[-5:]))) > -ftol) & \
               (float(np.sum(np.diff(L_list[-5:])) > 0) == 0):
                weights = tmp_w.copy()
                logging.debug(
                    '{}: Terminate as there is not enough changes on loss.'.
                    format(method_))
                break

        if (L_list[-1] - L_list[-2]) > 0:
            logging.debug('{}: Terminate as the loss increased {}.'.format(
                method_, jax_np.diff(L_list[-2:])))
            break
        else:
            weights = tmp_w.copy()

    L = _objective(weights, X, XX_T, target, k, method_, reg_lambda, reg_mu,
                   ref_row, initializer, reg_format)

    logging.debug(
        "{}: after {} iterations final log-loss = {:.7e}, sum_grad = {:.7e}".
        format(method_, i, L,
               jax_np.abs(gradient).sum()))

    return weights
Exemplo n.º 26
0
def logspace(*args, **kwargs):
  return JaxArray(jnp.logspace(*args, **kwargs))
Exemplo n.º 27
0
import jax.numpy as jnp
import numpy as onp
import json_tricks as json

import utils
import stein
import kernels
import distributions
import models
import config as cfg

key = random.PRNGKey(0)

# Poorly conditioned Gaussian
d = 50
variances = jnp.logspace(-5, 0, num=d)
target = distributions.Gaussian(jnp.zeros(d), variances)
proposal = distributions.Gaussian(jnp.zeros(d), jnp.ones(d))


@partial(jit, static_argnums=1)
def get_sd(samples, fun):
    """Compute SD(samples, p) given witness function fun"""
    return stein.stein_discrepancy(samples, target.logpdf, fun)


def kl_gradient(x):
    """Optimal witness function."""
    return grad(lambda x: target.logpdf(x) - proposal.logpdf(x))(x)

Exemplo n.º 28
0
def main(_):
    b_mode = False

    std1 = jnp.expand_dims(fits.getdata(FLAGS.std1).astype('float32'), -1)
    std2 = jnp.expand_dims(fits.getdata(FLAGS.std2).astype('float32'), -1)
    sigma_gamma = jnp.concatenate([std1, std2], axis=-1)

    #fits.writeto("./sigma_gamma.fits", onp.array(sigma_gamma), overwrite=False)
    def log_likelihood(x, sigma, meas_shear, mask, sigma_mask):
        """ Likelihood function at the level of the measured shear
    """
        if b_mode:
            x = x.reshape((360, 360, 2))
            ke = x[..., 0]
            kb = x[..., 1]
        else:
            ke = x.reshape((360, 360))
            kb = jnp.zeros(ke.shape)

        model_shear = jnp.stack(ks93inv(ke, kb), axis=-1)

        return -jnp.sum((model_shear - meas_shear)**2 /
                        ((sigma_gamma)**2 + sigma**2 + sigma_mask)) / 2.

    likelihood_score = jax.vmap(jax.grad(log_likelihood),
                                in_axes=[0, 0, None, None, None])

    map_size = fits.getdata(FLAGS.mask).astype('float32').shape[0]

    # Make the network
    #model = hk.transform_with_state(forward_fn)
    model = hk.without_apply_rng(hk.transform_with_state(forward_fn))

    rng_seq = hk.PRNGSequence(42)
    params, state = model.init(next(rng_seq),
                               jnp.zeros((1, map_size, map_size, 2)),
                               jnp.zeros((1, 1, 1, 1)),
                               is_training=True)

    # Load the weights of the neural network
    if not FLAGS.gaussian_only:
        with open(FLAGS.model_weights, 'rb') as file:
            params, state, sn_state = pickle.load(file)
        residual_prior_score = partial(model.apply,
                                       params,
                                       state,
                                       next(rng_seq),
                                       is_training=True)

    pixel_size = jnp.pi * FLAGS.resolution / 180. / 60.  #rad/pixel
    # Load prior power spectrum
    ps_data = onp.load(FLAGS.gaussian_path).astype('float32')
    ell = jnp.array(ps_data[0, :])
    # 4th channel for massivenu
    ps_halofit = jnp.array(ps_data[1, :] /
                           pixel_size**2)  # normalisation by pixel size
    # convert to pixel units of our simple power spectrum calculator
    kell = ell / 2 / jnp.pi * 360 * pixel_size / map_size
    # Interpolate the Power Spectrum in Fourier Space
    power_map = jnp.array(make_power_map(ps_halofit, map_size, kps=kell))

    # Load the noiseless convergence map
    if not FLAGS.COSMOS:
        print('i am here')
        convergence = fits.getdata(FLAGS.convergence).astype('float32')

        # Get the correspinding shear
        gamma1, gamma2 = ks93inv(convergence, onp.zeros_like(convergence))

        if not FLAGS.no_cluster:
            print('adding a cluster')
            # Compute NFW profile shear map
            g1_NFW, g2_NFW = gen_nfw_shear(x_cen=FLAGS.x_cluster,
                                           y_cen=FLAGS.y_cluster,
                                           resolution=FLAGS.resolution,
                                           nx=map_size,
                                           ny=map_size,
                                           z=FLAGS.z_halo,
                                           m=FLAGS.mass_halo,
                                           zs=FLAGS.zs)
            # Shear with added NFW cluster
            gamma1 += g1_NFW
            gamma2 += g2_NFW

            # Target convergence map with the added cluster
            #ke_cluster, kb_cluster = ks93(g1_cluster, g2_cluster)

        # Add noise the shear map
        if FLAGS.cosmos_noise_realisation:
            print('cosmos noise real')
            gamma1 += fits.getdata(FLAGS.cosmos_noise_e1).astype('float32')
            gamma2 += fits.getdata(FLAGS.cosmos_noise_e2).astype('float32')

        else:
            gamma1 += std1[..., 0] * jax.random.normal(
                jax.random.PRNGKey(42),
                gamma1.shape)  #onp.random.randn(map_size,map_size)
            gamma2 += std2[..., 0] * jax.random.normal(
                jax.random.PRNGKey(43),
                gamma2.shape)  #onp.random.randn(map_size,map_size)

        # Load the shear maps and corresponding mask
        gamma = onp.stack(
            [gamma1, gamma2],
            -1)  # Shear is expected in the format [map_size,map_size,2]

    else:

        # Load the shear maps and corresponding mask
        g1 = fits.getdata('../data/COSMOS/cosmos_full_e1_0.29arcmin360.fits'
                          ).astype('float32').reshape([map_size, map_size, 1])
        g2 = fits.getdata('../data/COSMOS/cosmos_full_e2_0.29arcmin360.fits'
                          ).astype('float32').reshape([map_size, map_size, 1])
        gamma = onp.concatenate([g1, g2], axis=-1)

    mask = jnp.expand_dims(fits.getdata(FLAGS.mask).astype('float32'),
                           -1)  # has shape [map_size,map_size,1]

    masked_true_shear = gamma * mask
    #fits.writeto("./input_shear.fits", onp.array(masked_true_shear), overwrite=False)

    sigma_mask = (1 - mask) * 1e10

    def score_fn(params, state, x, sigma, is_training=False):
        if b_mode:
            x = x.reshape((-1, 360, 360, 2))
            ke = x[..., 0]
            kb = x[..., 1]
        else:
            ke = x.reshape((-1, 360, 360))

        if FLAGS.gaussian_prior:
            # If requested, first compute the Gaussian prior
            gs = gaussian_prior_score(ke, sigma.reshape((-1, 1, 1)), power_map)
            gs = jnp.expand_dims(gs, axis=-1)
            #print((jnp.abs(sigma.reshape((-1,1,1,1)))**2).shape, (gs).shape)
            net_input = jnp.concatenate([
                ke.reshape((-1, 360, 360, 1)),
                jnp.abs(sigma.reshape((-1, 1, 1, 1)))**2 * gs
            ],
                                        axis=-1)
            res, state = model.apply(params,
                                     state,
                                     net_input,
                                     sigma.reshape((-1, 1, 1, 1)),
                                     is_training=is_training)
            if b_mode:
                gsb = gaussian_prior_score_b(kb, sigma.reshape((-1, 1, 1)))
                gsb = jnp.expand_dims(gsb, axis=-1)
            else:
                gsb = jnp.zeros_like(res)
        else:
            res, state = model.apply(params,
                                     state,
                                     ke.reshape((-1, 360, 360, 1)),
                                     sigma.reshape((-1, 1, 1, 1)),
                                     is_training=is_training)
            gs = jnp.zeros_like(res)
            gsb = jnp.zeros_like(res)
        return _, res, gs, gsb

    score_fn = partial(score_fn, params, state)

    def score_prior(x, sigma):
        if b_mode:
            _, res, gaussian_score, gsb = score_fn(x.reshape(-1, 360, 360, 2),
                                                   sigma.reshape(-1, 1, 1, 1))
        else:
            _, res, gaussian_score, gsb = score_fn(x.reshape(-1, 360, 360),
                                                   sigma.reshape(-1, 1, 1))
        ke = (res[..., 0:1] + gaussian_score).reshape(-1, 360 * 360)
        kb = gsb[..., 0].reshape(-1, 360 * 360)
        if b_mode:
            return jnp.stack([ke, kb], axis=-1)
        else:
            return ke

    def total_score_fn(x, sigma):
        if b_mode:
            sl = likelihood_score(x, sigma, masked_true_shear, mask,
                                  sigma_mask).reshape(-1, 360 * 360, 2)
        else:
            sl = likelihood_score(x, sigma, masked_true_shear, mask,
                                  sigma_mask).reshape(-1, 360 * 360)
        sp = score_prior(x, sigma)
        if b_mode:
            return (sl + sp).reshape(-1, 360 * 360 * 2)
        else:
            return (sl + sp).reshape(-1, 360 * 360)
        #return (sp).reshape(-1, 360*360,2)

    # Prepare the input with a high noise level map

    initial_temperature = FLAGS.initial_temperature
    delta_tmp = initial_temperature  #onp.sqrt(initial_temperature**2 - 0.148**2)
    initial_step_size = FLAGS.initial_step_size  #0.018
    min_steps_per_temp = FLAGS.min_steps_per_temp  #10
    init_image, _ = ks93(mask[..., 0] * masked_true_shear[..., 0],
                         mask[..., 0] * masked_true_shear[..., 1])
    init_image = jnp.expand_dims(init_image, axis=0)
    init_image = jnp.repeat(init_image, FLAGS.batch_size, axis=0)
    init_image += (delta_tmp * onp.random.randn(FLAGS.batch_size, 360, 360))

    def make_kernel_fn(target_log_prob_fn, target_score_fn, sigma):
        return ScoreHamiltonianMonteCarlo(
            target_log_prob_fn=target_log_prob_fn,
            target_score_fn=target_score_fn,
            step_size=initial_step_size *
            (jnp.max(sigma) / initial_temperature)**0.5,
            num_leapfrog_steps=3,
            num_delta_logp_steps=4)

    tmc = TemperedMC(
        target_score_fn=total_score_fn,  #score_prior,
        inverse_temperatures=initial_temperature *
        jnp.ones([FLAGS.batch_size]),
        make_kernel_fn=make_kernel_fn,
        gamma=0.98,
        min_temp=8e-3,
        min_steps_per_temp=min_steps_per_temp,
        num_delta_logp_steps=4)

    num_burnin_steps = int(0)

    samples, trace = tfp.mcmc.sample_chain(
        num_results=2,  #FLAGS.num_steps,
        current_state=init_image.reshape([FLAGS.batch_size, -1]),
        kernel=tmc,
        num_burnin_steps=num_burnin_steps,
        num_steps_between_results=6000,  #num_results//FLAGS.num_steps,
        trace_fn=lambda _, pkr:
        (pkr.pre_tempering_results.is_accepted, pkr.
         post_tempering_inverse_temperatures, pkr.tempering_log_accept_ratio),
        seed=jax.random.PRNGKey(int(time.time())))

    sol = samples[-1, ...].reshape(-1, 360, 360)

    from scipy import integrate

    @jax.jit
    def dynamics(t, x):
        if b_mode:
            x = x.reshape([-1, 360, 360, 2])
            return -0.5 * total_score_fn(
                x, sigma=jnp.ones(
                    (FLAGS.batch_size, 1, 1, 1)) * jnp.sqrt(t)).reshape([-1])
        else:
            x = x.reshape([-1, 360, 360])
            return -0.5 * total_score_fn(
                x, sigma=jnp.ones(
                    (FLAGS.batch_size, 1, 1)) * jnp.sqrt(t)).reshape([-1])

    init_ode = sol

    last_trace = jnp.mean(trace[1][-1])
    noise = last_trace
    start_and_end_times = jnp.logspace(jnp.log10(0.99 * noise**2), -5, num=50)

    solution = integrate.solve_ivp(dynamics, [noise**2, (1e-5)],
                                   init_ode.flatten(),
                                   t_eval=start_and_end_times)

    denoised = solution.y[:, -1].reshape([FLAGS.batch_size, 360, 360])

    fits.writeto("./results/" + FLAGS.output_folder + "/samples_hmc_" +
                 FLAGS.output_file + ".fits",
                 onp.array(sol),
                 overwrite=False)
    fits.writeto("./results/" + FLAGS.output_folder + "/samples_denoised_" +
                 FLAGS.output_file + ".fits",
                 onp.array(denoised),
                 overwrite=False)

    print('end of sampling')
Exemplo n.º 29
0
vmap_ts_batch = 1
vmap_beta_batch = 1
vmap_sig_batch = 1
vmap_diag_batch = 1
num_of_gpus = -1
b_std = 0.
diag_min = -3
diag_max = 1
num_diag = 100
dig_reg = 1e-4
save_path = '/Volumes/ravidziv/info_ntk/logs/{}_results.csv'
run_metrics = ['losses', 'ixt', 'dkl_output']

train_images, train_labels, test_images, test_labels = load_data(
    dataset=dataset, train_size=train_size, test_size=test_size)
ts = np.logspace(ts_min, ts_max, num_ts)
# ts = np.array([1e20])
sigs = np.logspace(sigs_min, sigs_max, num_sigs)
# diag_regs =  np.logspace(diag_min, diag_max, num_diag)
betas = np.logspace(beta_min, beta_max, num_betas)
# betas = np.array([0.])
metrics = {
    'losses':
    MetricsTuple(tuple(loss_metrics_name), get_losses, args=tuple({})),
    'ixt':
    MetricsTuple(tuple(ixt_metrics_name),
                 get_info_nec,
                 args=tuple({'num_of_samples': 2})),
    'dkl_output':
    MetricsTuple(tuple(dkl_metrics_name),
                 get_kl_posterior_prior,
Exemplo n.º 30
0
    def testNTK_NTKNNGPAgreement(self, train_shape, test_shape, network,
                                 out_logits):
        _, x_test, x_train, y_train = self._get_inputs(out_logits, test_shape,
                                                       train_shape)
        _, _, ker_fun = _build_network(train_shape[1:], network, out_logits)

        reg = 1e-7
        predictor = predict.gradient_descent_mse_ensemble(ker_fun,
                                                          x_train,
                                                          y_train,
                                                          diag_reg=reg)

        ts = np.logspace(-2, 8, 10).reshape((5, 2))

        for t in (None, 'ts'):
            for x in (None, 'x_test'):
                with self.subTest(t=t, x=x):
                    x = x if x is None else x_test
                    t = t if t is None else ts

                    ntk = predictor(t=t, get='ntk', x_test=x)

                    # Test time broadcasting
                    if t is not None:
                        ntk_ind = np.array([
                            predictor(t=t, get='ntk', x_test=x)
                            for t in t.ravel()
                        ]).reshape(t.shape + ntk.shape[2:])
                        self.assertAllClose(ntk_ind, ntk)

                    # Create a hacked kernel function that always returns the ntk kernel
                    def always_ntk(x1, x2, get=('nngp', 'ntk')):
                        out = ker_fun(x1, x2, get=('nngp', 'ntk'))
                        if get == 'nngp' or get == 'ntk':
                            return out.ntk
                        else:
                            return out._replace(nngp=out.ntk)

                    predictor_ntk = predict.gradient_descent_mse_ensemble(
                        always_ntk, x_train, y_train, diag_reg=reg)

                    ntk_nngp = predictor_ntk(t=t, get='nngp', x_test=x)

                    # Test if you use nngp equations with ntk, you get the same mean
                    self.assertAllClose(ntk, ntk_nngp)

                    # Next test that if you go through the NTK code path, but with only
                    # the NNGP kernel, we recreate the NNGP dynamics.
                    # Create a hacked kernel function that always returns the nngp kernel
                    def always_nngp(x1, x2, get=('nngp', 'ntk')):
                        out = ker_fun(x1, x2, get=('nngp', 'ntk'))
                        if get == 'nngp' or get == 'ntk':
                            return out.nngp
                        else:
                            return out._replace(ntk=out.nngp)

                    predictor_nngp = predict.gradient_descent_mse_ensemble(
                        always_nngp, x_train, y_train, diag_reg=reg)

                    nngp_cov = predictor(t=t,
                                         get='nngp',
                                         x_test=x,
                                         compute_cov=True).covariance

                    # test time broadcasting for covariance
                    nngp_ntk_cov = predictor_nngp(t=t,
                                                  get='ntk',
                                                  x_test=x,
                                                  compute_cov=True).covariance
                    if t is not None:
                        nngp_ntk_cov_ind = np.array([
                            predictor_nngp(t=t,
                                           get='ntk',
                                           x_test=x,
                                           compute_cov=True).covariance
                            for t in t.ravel()
                        ]).reshape(t.shape + nngp_cov.shape[2:])
                        self.assertAllClose(nngp_ntk_cov_ind, nngp_ntk_cov)

                    # Test if you use ntk equations with nngp, you get the same cov
                    # Although, due to accumulation of numerical errors, only roughly.
                    self.assertAllClose(nngp_cov, nngp_ntk_cov)