def test_flat(): hp = gvar.BufferDict({'log(sdev)': gvar.log(gvar.gvar(1, 1))}) x = np.linspace(0, 5, 10) def gpfactory1(hp): gp = lgp.GP(lgp.ExpQuad() * hp['sdev']**2) gp.addx(x, 'x') return gp def gpfactory2(hp): gp = lgp.GP(lgp.ExpQuad() * jnp.exp(hp[0])**2) gp.addx(x, 'x') return gp def gpfactory3(hp): gp = lgp.GP(lgp.ExpQuad() * jnp.exp(hp)**2) gp.addx(x, 'x') return gp truehp = gvar.sample(hp) truegp = gpfactory1(truehp) trueprior = truegp.prior() data = gvar.sample(trueprior) fit1 = lgp.empbayes_fit(hp, gpfactory1, data) fit2 = lgp.empbayes_fit(hp.buf, gpfactory2, data) fit3 = lgp.empbayes_fit(hp.buf[0], gpfactory3, data) util.assert_similar_gvars(fit1.p.buf[0], fit2.p[0], fit3.p)
def test_checks(): with pytest.raises(KeyError): lgp.empbayes_fit(gvar.gvar(0, 1), lambda: None, lambda: None, method='cippa') with pytest.raises(RuntimeError) as err: def makegp(x): gp = lgp.GP(lgp.ExpQuad(), checkfinite=False, checksym=False, checkpos=False) gp.addx(np.nan, 0) return gp lgp.empbayes_fit(gvar.gvar(0, 1), makegp, {0: 0}) assert 'minimization failed: ' in str(err.value)
def test_data(): hp = gvar.BufferDict({'log(sdev)': gvar.log(gvar.gvar(1, 1))}) x = np.linspace(0, 5, 10) def gpfactory(hp): gp = lgp.GP(lgp.ExpQuad() * hp['sdev']**2) gp.addx(x, 'x') return gp truehp = gvar.sample(hp) truegp = gpfactory(truehp) trueprior = truegp.prior() def makeerr(bd, err): return gvar.BufferDict(bd, buf=np.full_like(bd.buf, err)) data_noerr = gvar.sample(trueprior) error = makeerr(data_noerr, 0.1) zeroerror = makeerr(data_noerr, 0) zerocov = gvar.evalcov(gvar.gvar(data_noerr, zeroerror)) data_err = gvar.make_fake_data(gvar.gvar(data_noerr, error)) datas = [ [ data_noerr, gvar.gvar(data_noerr), (data_noerr, ), (data_noerr, zerocov), lambda _: data_noerr, lambda _: gvar.gvar(data_noerr), lambda _: (data_noerr, ), lambda _: (data_noerr, zerocov), ], [ data_err, (data_err, ), (gvar.mean(data_err), gvar.evalcov(data_err)), lambda _: data_err, lambda _: (data_err, ), lambda _: (gvar.mean(data_err), gvar.evalcov(data_err)), ], ] for datasets in datas: fits = [] for data in datasets: fit = lgp.empbayes_fit(hp, gpfactory, data) fits.append(fit) p = fits[0].minresult.x for fit in fits[1:]: np.testing.assert_allclose(fit.minresult.x, p, atol=1e-6)
def check_fit(hyperprior, gpfactory, dataerr=None, alpha=1e-5): """do a fit with empbayes_fit and check the fitted hyperparameters are compatible with the ones used to generate the data""" # generate hyperparameters truehp = gvar.sample(hyperprior) # generate data gp = gpfactory(truehp) data = gvar.sample(gp.prior()) if dataerr: mean = dataerr * np.random.randn(len(data.buf)) sdev = np.full_like(mean, dataerr) data += gvar.BufferDict(data, buf=gvar.gvar(mean, sdev)) # run fit fit = lgp.empbayes_fit(hyperprior, gpfactory, data, raises=False) # check fit result against hyperparameters chisq_test(fit.p - truehp, alpha)
def test_method(): hp = gvar.BufferDict({'log(sdev)': gvar.log(gvar.gvar(1, 1))}) x = np.linspace(0, 5, 10) def gpfactory(hp): gp = lgp.GP(lgp.ExpQuad() * hp['sdev']**2) gp.addx(x, 'x') return gp truehp = gvar.sample(hp) truegp = gpfactory(truehp) trueprior = truegp.prior() data_fixed = gvar.sample(trueprior) def data_variable(hp): return {k: v + hp['log(sdev)'] for k, v in data_fixed.items()} for data in [data_fixed, data_variable]: fits = [] kws = [ dict(method='nograd', minkw=dict(options=dict(xatol=1e-6))), dict(method='gradient'), dict(method='hessian'), dict(method='fisher'), dict(method='fisher'), dict(method='hessmod'), ] for kw in kws: kwargs = dict(data=data) kwargs.update(kw) kwargs.setdefault('minkw', {}).update(x0=truehp.buf) fit = lgp.empbayes_fit(hp, gpfactory, **kwargs) fits.append(fit) p = fits[0].minresult.x for fit in fits[1:]: np.testing.assert_allclose(fit.minresult.x, p, atol=1e-5)
corr = lgp.ExpQuad(scale=label_scale)(0, 1) print(f'corr = {corr:.3g}') def makegp(params): kernel_time = lgp.ExpQuad(scale=params['time_scale'], dim='time') kernel_label = lgp.ExpQuad(scale=label_scale, dim='label') gp = lgp.GP(kernel_time * kernel_label) gp.addx(x, 'data', deriv=(data_deriv, 'time')) gp.addx(np.array([(0, 0)], dtype=x.dtype), 'fixed_point') return gp prior = {'log(time_scale)': gvar.log(gvar.gvar(3, 2))} datadict = {'data': data, 'fixed_point': [gvar.gvar(0, 1e2)]} params = lgp.empbayes_fit(prior, makegp, datadict, raises=False, jit=True).p print('time_scale:', params['time_scale']) gp = makegp(gvar.mean(params)) time_pred = np.linspace(-10, 10, 100) xpred = np.empty((2, len(time_pred)), dtype=x.dtype) xpred['time'] = time_pred xpred['label'][0] = 0 xpred['label'][1] = 1 gp.addx(xpred[0], 0) gp.addx(xpred[1], 1, deriv=(1, 'time')) pred = gp.predfromdata(datadict, [0, 1]) fig, ax = plt.subplots(num='u', clear=True)
for q in 'ducs': idx = indices[q] qx = x[idx] qy = y[idx] checksum = np.sum(qdiff * (qy[:, 1:] + qy[:, :-1]) / 2 * np.diff(qx, axis=1)) print(f'sum_i={q}{q}bar int dx f_i(x) =', checksum) print('check integrals in fake data:') check_integrals(xdata['x'], priorsample['xdata']) #### FIT #### information = dict(constraints) information.update({ 'data': data, }) fit = lgp.empbayes_fit(hyperprior, makegp, information, raises=False, jit=True) print('hyperparameters:') for k in fit.p.all_keys(): d = fit.p[k] - hpsample[k] print(f'{k:10}: fit {fit.p[k]} true {hpsample[k]:5.2g} diff {d}') gp = makegp(gvar.mean(fit.p)) pred = gp.predfromdata(information, ['data', 'xdata']) print('check integrals in fit:') check_integrals(xdata['x'], pred['xdata']) #### PLOT RESULTS #### fig, axs = plt.subplots(1, 2, num='pdf4', clear=True, figsize=[9, 4.5])
x = lgp.StructuredArray(x) def makegp(params): kernel = lgp.Cauchy(scale=params['time_scale'], dim='time', beta=2) kernel *= lgp.ExpQuad(scale=params['label_scale'], dim='label') gp = lgp.GP(kernel) x['time'] = jnp.array([time, time - params['delay']]) gp.addx(x, 'A') return gp start = systime.time() hyperprior = gvar.BufferDict({ 'log(time_scale)': gvar.log(gvar.gvar(10, 10)), 'log(label_scale)': gvar.log(gvar.gvar(10, 10)), 'delay': gvar.gvar(10, 20) }) params = lgp.empbayes_fit(hyperprior, makegp, {'A': data}, raises=False, jit=True).p end = systime.time() print('minimization time = {:.2g} sec'.format(end - start)) print('time scale = {}'.format(params['time_scale'])) corr = lgp.ExpQuad(scale=gvar.mean(params['label_scale']))(0, 1) print('correlation = {:.3g} (equiv. scale = {})'.format(corr, params['label_scale'])) print('delay = {}'.format(params['delay'])) gp = makegp(gvar.mean(params)) xpred = np.empty((2, 100), dtype=x.dtype) time_pred = np.linspace(np.min(time), np.max(time) + 1.5 * (np.max(time) - np.min(time)), xpred.shape[1]) xpred['time'][0] = time_pred xpred['time'][1] = time_pred - gvar.mean(params['delay']) xpred['label'][0] = 0