def test_multidimensionality(self): # Check that ND data can be used with an objective/model/data # (or at least it doesn't stand in the way) rng = np.random.default_rng() x = rng.uniform(size=100).reshape(50, 2) desired = line_ND(x, self.p) assert desired.shape == (50, 2) data = Data1D((x, desired)) model = Model(self.p, fitfunc=line_ND) y = model(x) assert_allclose(y, desired) objective = Objective(model, data) assert_allclose(objective.chisqr(), 0) assert_allclose(objective.generative(), desired) assert_allclose(objective.residuals(), 0) assert objective.residuals().shape == (50, 2) objective.logl() objective.logpost() covar = objective.covar() assert covar.shape == (2, 2)
class TestFitterGauss(object): # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see # if the parameters and uncertainties come out correct @pytest.fixture(autouse=True) def setup_method(self, tmpdir): self.path = os.path.dirname(os.path.abspath(__file__)) self.tmpdir = tmpdir.strpath theoretical = np.loadtxt(os.path.join(self.path, "gauss_data.txt")) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() # these best weighted values and uncertainties obtained with Igor self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692] self.best_weighted_errors = [ 0.0220313708486, 1.12879436221, 0.0447659158681, 0.0412022938883, ] self.best_weighted_chisqr = 77.6040960351 self.best_unweighted = [ -0.10584111872702096, 19.240347049328989, 0.0092623066070940396, 1.501362314145845, ] self.best_unweighted_errors = [ 0.34246565477, 0.689820935208, 0.0411243173041, 0.0693429375282, ] self.best_unweighted_chisqr = 497.102084956 self.p0 = np.array([0.1, 20.0, 0.1, 0.1]) self.names = ["bkg", "A", "x0", "width"] self.bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)] self.params = Parameters(name="gauss_params") for p, name, bound in zip(self.p0, self.names, self.bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True self.params.append(param) self.model = Model(self.params, fitfunc=gauss) self.data = Data1D((xvals, yvals, evals)) self.objective = Objective(self.model, self.data) return 0 def test_pickle(self): # tests if a CurveFitter can be pickled/unpickled. f = CurveFitter(self.objective) pkl = pickle.dumps(f) g = pickle.loads(pkl) g._check_vars_unchanged() def test_best_weighted(self): assert_equal(len(self.objective.varying_parameters()), 4) self.objective.setp(self.p0) f = CurveFitter(self.objective, nwalkers=100) res = f.fit("least_squares", jac="3-point") output = res.x assert_almost_equal(output, self.best_weighted, 3) assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr, 5) # compare the residuals res = (self.data.y - self.model(self.data.x)) / self.data.y_err assert_equal(self.objective.residuals(), res) # compare objective.covar to the best_weighted_errors uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005) # we're also going to try the checkpointing here. checkpoint = os.path.join(self.tmpdir, "checkpoint.txt") # compare samples to best_weighted_errors np.random.seed(1) f.sample(steps=201, random_state=1, verbose=False, f=checkpoint) process_chain(self.objective, f.chain, nburn=50, nthin=10) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) # test that the checkpoint worked check_array = np.loadtxt(checkpoint) check_array = check_array.reshape(201, f._nwalkers, f.nvary) assert_allclose(check_array, f.chain) # test loading the checkpoint chain = load_chain(checkpoint) assert_allclose(chain, f.chain) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) assert_equal(f.chain.shape[0], 2) # we should be able to produce 2 * 100 steps from the generator g = self.objective.pgen(ngen=20000000000) s = [i for i, a in enumerate(g)] assert_equal(np.max(s), 200 - 1) g = self.objective.pgen(ngen=200) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # check that all the parameters are returned via pgen, not only those # being varied. self.params[0].vary = False f = CurveFitter(self.objective, nwalkers=100) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) g = self.objective.pgen(ngen=100) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # the following test won't work because of emcee/gh226. # chain = load_chain(checkpoint) # assert_(chain.shape == f.chain.shape) # assert_allclose(chain, f.chain) # try reproducing best fit with parallel tempering self.params[0].vary = True f = CurveFitter(self.objective, nwalkers=100, ntemps=10) f.fit("differential_evolution", seed=1) f.sample(steps=201, random_state=1, verbose=False) process_chain(self.objective, f.chain, nburn=50, nthin=15) print(self.params[0].chain.shape, self.params[0].chain) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) def test_best_unweighted(self): self.objective.weighted = False f = CurveFitter(self.objective, nwalkers=100) res = f.fit() output = res.x assert_almost_equal(self.objective.chisqr(), self.best_unweighted_chisqr) assert_almost_equal(output, self.best_unweighted, 5) # compare the residuals res = self.data.y - self.model(self.data.x) assert_equal(self.objective.residuals(), res) # compare objective._covar to the best_unweighted_errors uncertainties = np.array([param.stderr for param in self.params]) assert_almost_equal(uncertainties, self.best_unweighted_errors, 3) # the samples won't compare to the covariance matrix... # f.sample(nsteps=150, nburn=20, nthin=30, random_state=1) # uncertainties = [param.stderr for param in self.params] # assert_allclose(uncertainties, self.best_unweighted_errors, # rtol=0.15) def test_all_minimisers(self): """test minimisers against the Gaussian fit""" f = CurveFitter(self.objective) methods = ["differential_evolution", "L-BFGS-B", "least_squares"] if hasattr(sciopt, "shgo"): methods.append("shgo") if hasattr(sciopt, "dual_annealing"): methods.append("dual_annealing") for method in methods: self.objective.setp(self.p0) res = f.fit(method=method) assert_almost_equal(res.x, self.best_weighted, 3) # smoke test to check that we can use nlpost self.objective.setp(self.p0) logp0 = self.objective.logp() # check that probabilities are calculated correctly assert_allclose( self.objective.logpost(), self.objective.logp() + self.objective.logl(), ) assert_allclose(self.objective.nlpost(), -self.objective.logpost()) assert_allclose(self.objective.nlpost(self.p0), -self.objective.logpost(self.p0)) # if the priors are all uniform then the only difference between # logpost and logl is a constant. A minimiser should converge on the # same answer. The following tests examine that. # The test works for dual_annealing, but not for differential # evolution, not sure why that is. self.objective.setp(self.p0) res1 = f.fit(method="dual_annealing", seed=1) assert_almost_equal(res1.x, self.best_weighted, 3) nll1 = self.objective.nll() nlpost1 = self.objective.nlpost() self.objective.setp(self.p0) res2 = f.fit(method="dual_annealing", target="nlpost", seed=1) assert_almost_equal(res2.x, self.best_weighted, 3) nll2 = self.objective.nll() nlpost2 = self.objective.nlpost() assert_allclose(nlpost1, nlpost2, atol=0.001) assert_allclose(nll1, nll2, atol=0.001) # these two priors are calculated for different parameter values # (before and after the fit) they should be the same because all # the parameters have uniform priors. assert_almost_equal(self.objective.logp(), logp0) def test_pymc3_sample(self): # test sampling with pymc3 try: import pymc3 as pm from refnx.analysis import pymc3_model except (ModuleNotFoundError, ImportError, AttributeError): # can't run test if pymc3/theano not installed return with pymc3_model(self.objective): s = pm.NUTS() pm.sample( 200, tune=100, step=s, discard_tuned_samples=True, compute_convergence_checks=False, random_seed=1, )
class TestObjective(object): def setup_method(self): # Choose the "true" parameters. # Reproducible results! np.random.seed(123) self.m_true = -0.9594 self.b_true = 4.294 self.f_true = 0.534 self.m_ls = -1.1040757010910947 self.b_ls = 5.4405552502319505 # Generate some synthetic data from the model. N = 50 x = np.sort(10 * np.random.rand(N)) y_err = 0.1 + 0.5 * np.random.rand(N) y = self.m_true * x + self.b_true y += np.abs(self.f_true * y) * np.random.randn(N) y += y_err * np.random.randn(N) self.data = Data1D(data=(x, y, y_err)) self.p = Parameter(self.b_ls, 'b') | Parameter(self.m_ls, 'm') self.model = Model(self.p, fitfunc=line) self.objective = Objective(self.model, self.data) # want b and m self.p[0].vary = True self.p[1].vary = True mod = np.array([ 4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084, 2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983, 1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461, 1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649, 0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299, 0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431, -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768, -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682, -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479, -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086 ]) self.mod = mod def test_model(self): # test that the line data produced by our model is the same as the # test data assert_almost_equal(self.model(self.data.x), self.mod) def test_synthetic_data(self): # test that we create the correct synthetic data by performing a least # squares fit on it assert_(self.data.y_err is not None) x, y, y_err, _ = self.data.data A = np.vstack((np.ones_like(x), x)).T C = np.diag(y_err * y_err) cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A))) b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y))) assert_almost_equal(b_ls, self.b_ls) assert_almost_equal(m_ls, self.m_ls) def test_setp(self): # check that we can set parameters self.p[0].vary = False assert_(len(self.objective.varying_parameters()) == 1) self.objective.setp(np.array([1.23])) assert_equal(self.p[1].value, 1.23) self.objective.setp(np.array([1.234, 1.23])) assert_equal(np.array(self.p), [1.234, 1.23]) def test_pvals(self): assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls]) self.objective.parameters.pvals = [1, 2] assert_equal(self.objective.parameters.pvals, [1, 2.]) def test_logp(self): self.p[0].range(0, 10) assert_almost_equal(self.objective.logp(), np.log(0.1)) # logp should set parameters self.objective.logp([8, 2]) assert_equal(np.array(self.objective.parameters), [8, 2]) # if we supply a value outside the range it should return -inf assert_equal(self.objective.logp([-1, 2]), -np.inf) def test_logpost(self): # http://dan.iel.fm/emcee/current/user/line/ assert_almost_equal(self.objective.logp(), 0) assert_almost_equal(self.objective.nlpost(), -self.objective.logpost()) # the uncertainties are underestimated in this example... # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_almost_equal(self.objective.logl() + amend, -559.01078135444595) assert_almost_equal(self.objective.logpost() + amend, -559.01078135444595) def test_chisqr(self): assert_almost_equal(self.objective.chisqr(), 1231.1096772954229) def test_residuals(self): # weighted, with and without transform assert_almost_equal(self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err) objective = Objective(self.model, self.data, transform=Transform('lin')) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective(self.model, self.data, use_weights=False, transform=Transform('lin')) assert_almost_equal(objective.residuals(), self.data.y - self.mod) def test_masked_dataset(self): residuals = self.objective.residuals() mask = np.full_like(self.objective.data.y, True, bool) mask[1] = False self.objective.data.mask = mask assert_equal(self.objective.residuals().size, residuals.size - 1) def test_logp_extra(self): self.objective.logp_extra = logp_extra # repeat logp test self.p[0].range(0, 10) assert_almost_equal(self.objective.logp(), np.log(0.1) + 1) def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.logp_extra = logp_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) def test_transform_pickle(self): # can you pickle the Transform object? pkl = pickle.dumps(Transform('logY')) pickle.loads(pkl) def test_transform(self): pth = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(pth, 'c_PLP0011859_q.txt') data = ReflectDataset(fname) t = Transform('logY') yt, et = t(data.x, data.y, y_err=data.y_err) assert_equal(yt, np.log10(data.y)) yt, _ = t(data.x, data.y, y_err=None) assert_equal(yt, np.log10(data.y)) EPy, EPe = EP.EPlog10(data.y, data.y_err) assert_equal(yt, EPy) assert_equal(et, EPe) def test_repr_transform(self): p = Transform(None) q = eval(repr(p)) assert (p.form == q.form) p = Transform('logY') q = eval(repr(p)) assert (p.form == q.form) def test_lnsigma(self): # check that lnsigma works correctly, by using the emcee line fit # example def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) print(inv_sigma2) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) lnsigma = Parameter(np.log(self.f_true), 'lnsigma', bounds=(-10, 1), vary=True) self.objective.setp(np.array([self.b_true, self.m_true])) self.objective.lnsigma = lnsigma # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_allclose(self.objective.logl() + amend, bo.logl()) def test_base_emcee(self): # check that the base objective works against the emcee example. def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) # test that the wrapper gives the same logl as the direct function assert_almost_equal(bo.logl(theta), logl(theta, x, y, yerr)) assert_almost_equal(bo.logl(theta), -bo.nll(theta)) assert_almost_equal(bo.nll(theta), 12.8885352412) # Find the maximum likelihood value. result = minimize(bo.nll, theta) # for repeatable sampling np.random.seed(1) ndim, nwalkers = 3, 100 pos = [ result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.logpost) state = emcee.State(pos, random_state=np.random.get_state()) sampler.run_mcmc(state, 800) burnin = 200 samples = sampler.get_chain()[burnin:, :, :].reshape((-1, ndim)) samples[:, 2] = np.exp(samples[:, 2]) m_mc, b_mc, f_mc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04) assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04) assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06) # # smoke test for covariance matrix bo.parameters = np.array(result['x']) covar1 = bo.covar() uncertainties = np.sqrt(np.diag(covar1)) # covariance from objective._covar should be almost equal to # the covariance matrix from sampling covar2 = np.cov(samples.T) assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2) # check covariance of self.objective # TODO var_arr = result['x'][:] var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0] # assert_(self.objective.data.weighted) # self.objective.parameters.pvals = var_arr # covar3 = self.objective.covar() # uncertainties3 = np.sqrt(np.diag(covar3)) # assert_almost_equal(uncertainties3, uncertainties) # assert(False) def test_covar(self): # checks objective.covar against optimize.least_squares covariance. path = os.path.dirname(os.path.abspath(__file__)) theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt')) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() p0 = np.array([0.1, 20., 0.1, 0.1]) names = ['bkg', 'A', 'x0', 'width'] bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)] params = Parameters(name="gauss_params") for p, name, bound in zip(p0, names, bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True params.append(param) model = Model(params, fitfunc=gauss) data = Data1D((xvals, yvals, evals)) objective = Objective(model, data) # first calculate least_squares jac/hess/covariance matrices res = least_squares(objective.residuals, np.array(params), jac='3-point') hess_least_squares = np.matmul(res.jac.T, res.jac) covar_least_squares = np.linalg.inv(hess_least_squares) # now calculate corresponding matrices by hand, to see if the approach # concurs with least_squares objective.setp(res.x) _pvals = np.array(res.x) def residuals_scaler(vals): return np.squeeze(objective.residuals(_pvals * vals)) jac = approx_derivative(residuals_scaler, np.ones_like(_pvals)) hess = np.matmul(jac.T, jac) covar = np.linalg.inv(hess) covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T assert_allclose(covar, covar_least_squares) # check that objective.covar corresponds to the least_squares # covariance matrix objective.setp(res.x) _pvals = np.array(res.x) covar_objective = objective.covar() assert_allclose(covar_objective, covar_least_squares) # now see what happens with a parameter that has no effect on residuals param = Parameter(1.234, name='dummy') param.vary = True params.append(param) from pytest import raises with raises(LinAlgError): objective.covar() @pytest.mark.xfail def test_pymc3(self): # test objective logl against pymc3 # don't run this test if pymc3 is not installed try: import pymc3 as pm except ImportError: return logl = self.objective.logl() from refnx.analysis import pymc_objective from refnx.analysis.objective import _to_pymc3_distribution mod = pymc_objective(self.objective) with mod: pymc_logl = mod.logp({ 'p0': self.p[0].value, 'p1': self.p[1].value }) assert_allclose(logl, pymc_logl) # now check some of the distributions with pm.Model(): p = Parameter(1, bounds=(1, 10)) d = _to_pymc3_distribution('a', p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) q = Parameter(1, bounds=PDF(stats.uniform(1, 9))) d = _to_pymc3_distribution('b', q) assert_almost_equal(d.distribution.logp(2).eval(), q.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) p = Parameter(1, bounds=PDF(stats.uniform)) d = _to_pymc3_distribution('c', p) assert_almost_equal(d.distribution.logp(0.5).eval(), p.logp(0.5)) p = Parameter(1, bounds=PDF(stats.norm)) d = _to_pymc3_distribution('d', p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) p = Parameter(1, bounds=PDF(stats.norm(1, 10))) d = _to_pymc3_distribution('e', p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))