def test_residuals(self): # weighted, with and without transform assert_almost_equal(self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err) objective = Objective(self.model, self.data, transform=Transform('lin')) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective(self.model, self.data, use_weights=False, transform=Transform('lin')) assert_almost_equal(objective.residuals(), self.data.y - self.mod)
def test_multidimensionality(self): # Check that ND data can be used with an objective/model/data # (or at least it doesn't stand in the way) rng = np.random.default_rng() x = rng.uniform(size=100).reshape(50, 2) desired = line_ND(x, self.p) assert desired.shape == (50, 2) data = Data1D((x, desired)) model = Model(self.p, fitfunc=line_ND) y = model(x) assert_allclose(y, desired) objective = Objective(model, data) assert_allclose(objective.chisqr(), 0) assert_allclose(objective.generative(), desired) assert_allclose(objective.residuals(), 0) assert objective.residuals().shape == (50, 2) objective.logl() objective.logpost() covar = objective.covar() assert covar.shape == (2, 2)
def test_reflectivity_emcee(self): model = self.model361 model.dq = 5. objective = Objective(model, (self.qvals361, self.rvals361, self.evals361), transform=Transform('logY')) fitter = CurveFitter(objective, nwalkers=100) assert_(len(objective.generative().shape) == 1) assert_(len(objective.residuals().shape) == 1) res = fitter.fit('least_squares') res_mcmc = fitter.sample(steps=5, nthin=10, random_state=1, verbose=False) mcmc_val = [mcmc_result.median for mcmc_result in res_mcmc] assert_allclose(mcmc_val, res.x, rtol=0.05)
def test_multipledataset_corefinement(self): # test corefinement of three datasets data361 = ReflectDataset(os.path.join(self.pth, 'e361r.txt')) data365 = ReflectDataset(os.path.join(self.pth, 'e365r.txt')) data366 = ReflectDataset(os.path.join(self.pth, 'e366r.txt')) si = SLD(2.07, name='Si') sio2 = SLD(3.47, name='SiO2') d2o = SLD(6.36, name='d2o') h2o = SLD(-0.56, name='h2o') cm3 = SLD(3.47, name='cm3') polymer = SLD(1, name='polymer') structure361 = si | sio2(10, 4) | polymer(200, 3) | d2o(0, 3) structure365 = si | structure361[1] | structure361[2] | cm3(0, 3) structure366 = si | structure361[1] | structure361[2] | h2o(0, 3) structure365[-1].rough = structure361[-1].rough structure366[-1].rough = structure361[-1].rough structure361[1].thick.setp(vary=True, bounds=(0, 20)) structure361[2].thick.setp(value=200., bounds=(200., 250.), vary=True) structure361[2].sld.real.setp(vary=True, bounds=(0, 2)) structure361[2].vfsolv.setp(value=5., bounds=(0., 100.), vary=True) model361 = ReflectModel(structure361, bkg=2e-5) model365 = ReflectModel(structure365, bkg=2e-5) model366 = ReflectModel(structure366, bkg=2e-5) model361.bkg.setp(vary=True, bounds=(1e-6, 5e-5)) model365.bkg.setp(vary=True, bounds=(1e-6, 5e-5)) model366.bkg.setp(vary=True, bounds=(1e-6, 5e-5)) objective361 = Objective(model361, data361) objective365 = Objective(model365, data365) objective366 = Objective(model366, data366) global_objective = GlobalObjective( [objective361, objective365, objective366]) # are the right numbers of parameters varying? assert_equal(len(global_objective.varying_parameters()), 7) # can we set the parameters? global_objective.setp(np.array([1e-5, 10, 212, 1, 10, 1e-5, 1e-5])) f = CurveFitter(global_objective) f.fit() indiv_chisqr = np.sum( [objective.chisqr() for objective in global_objective.objectives]) # the overall chi2 should be sum of individual chi2 global_chisqr = global_objective.chisqr() assert_almost_equal(global_chisqr, indiv_chisqr) # now check that the parameters were held in common correctly. slabs361 = structure361.slabs() slabs365 = structure365.slabs() slabs366 = structure366.slabs() assert_equal(slabs365[0:2, 0:5], slabs361[0:2, 0:5]) assert_equal(slabs366[0:2, 0:5], slabs361[0:2, 0:5]) assert_equal(slabs365[-1, 3], slabs361[-1, 3]) assert_equal(slabs366[-1, 3], slabs361[-1, 3]) # check that the residuals are the correct lengths res361 = objective361.residuals() res365 = objective365.residuals() res366 = objective366.residuals() res_global = global_objective.residuals() assert_allclose(res_global[0:len(res361)], res361, rtol=1e-5) assert_allclose(res_global[len(res361):len(res361) + len(res365)], res365, rtol=1e-5) assert_allclose(res_global[len(res361) + len(res365):], res366, rtol=1e-5) repr(global_objective)
class TestFitterGauss(object): # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see # if the parameters and uncertainties come out correct @pytest.fixture(autouse=True) def setup_method(self, tmpdir): self.path = os.path.dirname(os.path.abspath(__file__)) self.tmpdir = tmpdir.strpath theoretical = np.loadtxt(os.path.join(self.path, "gauss_data.txt")) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() # these best weighted values and uncertainties obtained with Igor self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692] self.best_weighted_errors = [ 0.0220313708486, 1.12879436221, 0.0447659158681, 0.0412022938883, ] self.best_weighted_chisqr = 77.6040960351 self.best_unweighted = [ -0.10584111872702096, 19.240347049328989, 0.0092623066070940396, 1.501362314145845, ] self.best_unweighted_errors = [ 0.34246565477, 0.689820935208, 0.0411243173041, 0.0693429375282, ] self.best_unweighted_chisqr = 497.102084956 self.p0 = np.array([0.1, 20.0, 0.1, 0.1]) self.names = ["bkg", "A", "x0", "width"] self.bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)] self.params = Parameters(name="gauss_params") for p, name, bound in zip(self.p0, self.names, self.bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True self.params.append(param) self.model = Model(self.params, fitfunc=gauss) self.data = Data1D((xvals, yvals, evals)) self.objective = Objective(self.model, self.data) return 0 def test_pickle(self): # tests if a CurveFitter can be pickled/unpickled. f = CurveFitter(self.objective) pkl = pickle.dumps(f) g = pickle.loads(pkl) g._check_vars_unchanged() def test_best_weighted(self): assert_equal(len(self.objective.varying_parameters()), 4) self.objective.setp(self.p0) f = CurveFitter(self.objective, nwalkers=100) res = f.fit("least_squares", jac="3-point") output = res.x assert_almost_equal(output, self.best_weighted, 3) assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr, 5) # compare the residuals res = (self.data.y - self.model(self.data.x)) / self.data.y_err assert_equal(self.objective.residuals(), res) # compare objective.covar to the best_weighted_errors uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005) # we're also going to try the checkpointing here. checkpoint = os.path.join(self.tmpdir, "checkpoint.txt") # compare samples to best_weighted_errors np.random.seed(1) f.sample(steps=201, random_state=1, verbose=False, f=checkpoint) process_chain(self.objective, f.chain, nburn=50, nthin=10) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) # test that the checkpoint worked check_array = np.loadtxt(checkpoint) check_array = check_array.reshape(201, f._nwalkers, f.nvary) assert_allclose(check_array, f.chain) # test loading the checkpoint chain = load_chain(checkpoint) assert_allclose(chain, f.chain) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) assert_equal(f.chain.shape[0], 2) # we should be able to produce 2 * 100 steps from the generator g = self.objective.pgen(ngen=20000000000) s = [i for i, a in enumerate(g)] assert_equal(np.max(s), 200 - 1) g = self.objective.pgen(ngen=200) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # check that all the parameters are returned via pgen, not only those # being varied. self.params[0].vary = False f = CurveFitter(self.objective, nwalkers=100) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) g = self.objective.pgen(ngen=100) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # the following test won't work because of emcee/gh226. # chain = load_chain(checkpoint) # assert_(chain.shape == f.chain.shape) # assert_allclose(chain, f.chain) # try reproducing best fit with parallel tempering self.params[0].vary = True f = CurveFitter(self.objective, nwalkers=100, ntemps=10) f.fit("differential_evolution", seed=1) f.sample(steps=201, random_state=1, verbose=False) process_chain(self.objective, f.chain, nburn=50, nthin=15) print(self.params[0].chain.shape, self.params[0].chain) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) def test_best_unweighted(self): self.objective.weighted = False f = CurveFitter(self.objective, nwalkers=100) res = f.fit() output = res.x assert_almost_equal(self.objective.chisqr(), self.best_unweighted_chisqr) assert_almost_equal(output, self.best_unweighted, 5) # compare the residuals res = self.data.y - self.model(self.data.x) assert_equal(self.objective.residuals(), res) # compare objective._covar to the best_unweighted_errors uncertainties = np.array([param.stderr for param in self.params]) assert_almost_equal(uncertainties, self.best_unweighted_errors, 3) # the samples won't compare to the covariance matrix... # f.sample(nsteps=150, nburn=20, nthin=30, random_state=1) # uncertainties = [param.stderr for param in self.params] # assert_allclose(uncertainties, self.best_unweighted_errors, # rtol=0.15) def test_all_minimisers(self): """test minimisers against the Gaussian fit""" f = CurveFitter(self.objective) methods = ["differential_evolution", "L-BFGS-B", "least_squares"] if hasattr(sciopt, "shgo"): methods.append("shgo") if hasattr(sciopt, "dual_annealing"): methods.append("dual_annealing") for method in methods: self.objective.setp(self.p0) res = f.fit(method=method) assert_almost_equal(res.x, self.best_weighted, 3) # smoke test to check that we can use nlpost self.objective.setp(self.p0) logp0 = self.objective.logp() # check that probabilities are calculated correctly assert_allclose( self.objective.logpost(), self.objective.logp() + self.objective.logl(), ) assert_allclose(self.objective.nlpost(), -self.objective.logpost()) assert_allclose(self.objective.nlpost(self.p0), -self.objective.logpost(self.p0)) # if the priors are all uniform then the only difference between # logpost and logl is a constant. A minimiser should converge on the # same answer. The following tests examine that. # The test works for dual_annealing, but not for differential # evolution, not sure why that is. self.objective.setp(self.p0) res1 = f.fit(method="dual_annealing", seed=1) assert_almost_equal(res1.x, self.best_weighted, 3) nll1 = self.objective.nll() nlpost1 = self.objective.nlpost() self.objective.setp(self.p0) res2 = f.fit(method="dual_annealing", target="nlpost", seed=1) assert_almost_equal(res2.x, self.best_weighted, 3) nll2 = self.objective.nll() nlpost2 = self.objective.nlpost() assert_allclose(nlpost1, nlpost2, atol=0.001) assert_allclose(nll1, nll2, atol=0.001) # these two priors are calculated for different parameter values # (before and after the fit) they should be the same because all # the parameters have uniform priors. assert_almost_equal(self.objective.logp(), logp0) def test_pymc3_sample(self): # test sampling with pymc3 try: import pymc3 as pm from refnx.analysis import pymc3_model except (ModuleNotFoundError, ImportError, AttributeError): # can't run test if pymc3/theano not installed return with pymc3_model(self.objective): s = pm.NUTS() pm.sample( 200, tune=100, step=s, discard_tuned_samples=True, compute_convergence_checks=False, random_seed=1, )
class TestObjective(object): def setup_method(self): # Choose the "true" parameters. # Reproducible results! np.random.seed(123) self.m_true = -0.9594 self.b_true = 4.294 self.f_true = 0.534 self.m_ls = -1.1040757010910947 self.b_ls = 5.4405552502319505 # Generate some synthetic data from the model. N = 50 x = np.sort(10 * np.random.rand(N)) y_err = 0.1 + 0.5 * np.random.rand(N) y = self.m_true * x + self.b_true y += np.abs(self.f_true * y) * np.random.randn(N) y += y_err * np.random.randn(N) self.data = Data1D(data=(x, y, y_err)) self.p = Parameter(self.b_ls, 'b') | Parameter(self.m_ls, 'm') self.model = Model(self.p, fitfunc=line) self.objective = Objective(self.model, self.data) # want b and m self.p[0].vary = True self.p[1].vary = True mod = np.array([ 4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084, 2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983, 1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461, 1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649, 0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299, 0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431, -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768, -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682, -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479, -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086 ]) self.mod = mod def test_model(self): # test that the line data produced by our model is the same as the # test data assert_almost_equal(self.model(self.data.x), self.mod) def test_synthetic_data(self): # test that we create the correct synthetic data by performing a least # squares fit on it assert_(self.data.y_err is not None) x, y, y_err, _ = self.data.data A = np.vstack((np.ones_like(x), x)).T C = np.diag(y_err * y_err) cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A))) b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y))) assert_almost_equal(b_ls, self.b_ls) assert_almost_equal(m_ls, self.m_ls) def test_setp(self): # check that we can set parameters self.p[0].vary = False assert_(len(self.objective.varying_parameters()) == 1) self.objective.setp(np.array([1.23])) assert_equal(self.p[1].value, 1.23) self.objective.setp(np.array([1.234, 1.23])) assert_equal(np.array(self.p), [1.234, 1.23]) def test_pvals(self): assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls]) self.objective.parameters.pvals = [1, 2] assert_equal(self.objective.parameters.pvals, [1, 2.]) def test_lnprior(self): self.p[0].range(0, 10) assert_almost_equal(self.objective.lnprior(), np.log(0.1)) # lnprior should set parameters self.objective.lnprior([8, 2]) assert_equal(np.array(self.objective.parameters), [8, 2]) # if we supply a value outside the range it should return -inf assert_equal(self.objective.lnprior([-1, 2]), -np.inf) def test_lnprob(self): # http://dan.iel.fm/emcee/current/user/line/ assert_almost_equal(self.objective.lnprior(), 0) # the uncertainties are underestimated in this example... assert_almost_equal(self.objective.lnlike(), -559.01078135444595) assert_almost_equal(self.objective.lnprob(), -559.01078135444595) def test_chisqr(self): assert_almost_equal(self.objective.chisqr(), 1231.1096772954229) def test_residuals(self): # weighted, with and without transform assert_almost_equal(self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err) objective = Objective(self.model, self.data, transform=Transform('lin')) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective(self.model, self.data, use_weights=False, transform=Transform('lin')) assert_almost_equal(objective.residuals(), self.data.y - self.mod) def test_lnprob_extra(self): self.objective.lnprob_extra = lnprob_extra # repeat lnprior test self.p[0].range(0, 10) assert_almost_equal(self.objective.lnprior(), np.log(0.1) + 1) def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.lnprob_extra = lnprob_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) def test_transform_pickle(self): # can you pickle the Transform object? pkl = pickle.dumps(Transform('logY')) pickle.loads(pkl) def test_transform(self): pth = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(pth, 'c_PLP0011859_q.txt') data = ReflectDataset(fname) t = Transform('logY') yt, et = t(data.x, data.y, y_err=data.y_err) assert_equal(yt, np.log10(data.y)) yt, _ = t(data.x, data.y, y_err=None) assert_equal(yt, np.log10(data.y)) EPy, EPe = EP.EPlog10(data.y, data.y_err) assert_equal(yt, EPy) assert_equal(et, EPe) def test_lnsigma(self): # check that lnsigma works correctly def lnprior(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def lnlike(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) print(inv_sigma2) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, lnlike, lnprior=lnprior, fcn_args=(x, y, yerr)) lnsigma = Parameter(np.log(self.f_true), 'lnsigma', bounds=(-10, 1), vary=True) self.objective.setp(np.array([self.b_true, self.m_true])) self.objective.lnsigma = lnsigma assert_allclose(self.objective.lnlike(), bo.lnlike()) def test_base_emcee(self): # check that the base objective works against the emcee example. def lnprior(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def lnlike(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, lnlike, lnprior=lnprior, fcn_args=(x, y, yerr)) # test that the wrapper gives the same lnlike as the direct function assert_almost_equal(bo.lnlike(theta), lnlike(theta, x, y, yerr)) assert_almost_equal(bo.lnlike(theta), -bo.nll(theta)) assert_almost_equal(bo.nll(theta), 12.8885352412) # Find the maximum likelihood value. result = minimize(bo.nll, theta) # for repeatable sampling np.random.seed(1) ndim, nwalkers = 3, 100 pos = [ result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.lnprob) sampler.run_mcmc(pos, 800, rstate0=np.random.get_state()) burnin = 200 samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) samples[:, 2] = np.exp(samples[:, 2]) m_mc, b_mc, f_mc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04) assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04) assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06) # # smoke test for covariance matrix bo.parameters = np.array(result['x']) covar1 = bo.covar() uncertainties = np.sqrt(np.diag(covar1)) # covariance from objective._covar should be almost equal to # the covariance matrix from sampling covar2 = np.cov(samples.T) assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2) # check covariance of self.objective # TODO var_arr = result['x'][:] var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0] # assert_(self.objective.data.weighted) # self.objective.parameters.pvals = var_arr # covar3 = self.objective.covar() # uncertainties3 = np.sqrt(np.diag(covar3)) # assert_almost_equal(uncertainties3, uncertainties) # assert(False) def test_covar(self): # checks objective.covar against optimize.least_squares covariance. path = os.path.dirname(os.path.abspath(__file__)) theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt')) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() p0 = np.array([0.1, 20., 0.1, 0.1]) names = ['bkg', 'A', 'x0', 'width'] bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)] params = Parameters(name="gauss_params") for p, name, bound in zip(p0, names, bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True params.append(param) model = Model(params, fitfunc=gauss) data = Data1D((xvals, yvals, evals)) objective = Objective(model, data) # first calculate least_squares jac/hess/covariance matrices res = least_squares(objective.residuals, np.array(params), jac='3-point') hess_least_squares = np.matmul(res.jac.T, res.jac) covar_least_squares = np.linalg.inv(hess_least_squares) # now calculate corresponding matrices by hand, to see if the approach # concurs with least_squares objective.setp(res.x) _pvals = np.array(res.x) def residuals_scaler(vals): return np.squeeze(objective.residuals(_pvals * vals)) jac = approx_derivative(residuals_scaler, np.ones_like(_pvals)) hess = np.matmul(jac.T, jac) covar = np.linalg.inv(hess) covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T assert_allclose(covar, covar_least_squares) # check that objective.covar corresponds to the least_squares # covariance matrix objective.setp(res.x) _pvals = np.array(res.x) covar_objective = objective.covar() assert_allclose(covar_objective, covar_least_squares) # now see what happens with a parameter that has no effect on residuals param = Parameter(1.234, name='dummy') param.vary = True params.append(param) from pytest import raises with raises(LinAlgError): objective.covar()
class TestFitterGauss(object): # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see # if the parameters and uncertainties come out correct @pytest.fixture(autouse=True) def setup_method(self, tmpdir): self.path = os.path.dirname(os.path.abspath(__file__)) self.tmpdir = tmpdir.strpath theoretical = np.loadtxt(os.path.join(self.path, 'gauss_data.txt')) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() # these best weighted values and uncertainties obtained with Igor self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692] self.best_weighted_errors = [0.0220313708486, 1.12879436221, 0.0447659158681, 0.0412022938883] self.best_weighted_chisqr = 77.6040960351 self.best_unweighted = [-0.10584111872702096, 19.240347049328989, 0.0092623066070940396, 1.501362314145845] self.best_unweighted_errors = [0.34246565477, 0.689820935208, 0.0411243173041, 0.0693429375282] self.best_unweighted_chisqr = 497.102084956 self.p0 = np.array([0.1, 20., 0.1, 0.1]) self.names = ['bkg', 'A', 'x0', 'width'] self.bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)] self.params = Parameters(name="gauss_params") for p, name, bound in zip(self.p0, self.names, self.bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True self.params.append(param) self.model = Model(self.params, fitfunc=gauss) self.data = Data1D((xvals, yvals, evals)) self.objective = Objective(self.model, self.data) return 0 def test_best_weighted(self): assert_equal(len(self.objective.varying_parameters()), 4) self.objective.setp(self.p0) f = CurveFitter(self.objective, nwalkers=100) res = f.fit('least_squares', jac='3-point') output = res.x assert_almost_equal(output, self.best_weighted, 3) assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr, 5) # compare the residuals res = (self.data.y - self.model(self.data.x)) / self.data.y_err assert_equal(self.objective.residuals(), res) # compare objective.covar to the best_weighted_errors uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005) # we're also going to try the checkpointing here. checkpoint = os.path.join(self.tmpdir, 'checkpoint.txt') # compare samples to best_weighted_errors np.random.seed(1) f.sample(steps=101, random_state=1, verbose=False, f=checkpoint) process_chain(self.objective, f.chain, nburn=50, nthin=10) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) # test that the checkpoint worked check_array = np.loadtxt(checkpoint) check_array = check_array.reshape(101, f._nwalkers, f.nvary) assert_allclose(check_array, f.chain) # test loading the checkpoint chain = load_chain(checkpoint) assert_allclose(chain, f.chain) f.initialise('jitter') f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) assert_equal(f.chain.shape[0], 2) # the following test won't work because of emcee/gh226. # chain = load_chain(checkpoint) # assert_(chain.shape == f.chain.shape) # assert_allclose(chain, f.chain) def test_best_unweighted(self): self.objective.weighted = False f = CurveFitter(self.objective, nwalkers=100) res = f.fit() output = res.x assert_almost_equal(self.objective.chisqr(), self.best_unweighted_chisqr) assert_almost_equal(output, self.best_unweighted, 5) # compare the residuals res = self.data.y - self.model(self.data.x) assert_equal(self.objective.residuals(), res) # compare objective._covar to the best_unweighted_errors uncertainties = np.array([param.stderr for param in self.params]) assert_almost_equal(uncertainties, self.best_unweighted_errors, 3)
class TestObjective(object): def setup_method(self): # Choose the "true" parameters. # Reproducible results! np.random.seed(123) self.m_true = -0.9594 self.b_true = 4.294 self.f_true = 0.534 self.m_ls = -1.1040757010910947 self.b_ls = 5.4405552502319505 # Generate some synthetic data from the model. N = 50 x = np.sort(10 * np.random.rand(N)) y_err = 0.1 + 0.5 * np.random.rand(N) y = self.m_true * x + self.b_true y += np.abs(self.f_true * y) * np.random.randn(N) y += y_err * np.random.randn(N) self.data = Data1D(data=(x, y, y_err)) self.p = Parameter(self.b_ls, "b") | Parameter(self.m_ls, "m") self.model = Model(self.p, fitfunc=line) self.objective = Objective(self.model, self.data) # want b and m self.p[0].vary = True self.p[1].vary = True mod = np.array([ 4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084, 2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983, 1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461, 1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649, 0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299, 0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431, -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768, -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682, -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479, -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086, ]) self.mod = mod def test_model(self): # test that the line data produced by our model is the same as the # test data assert_almost_equal(self.model(self.data.x), self.mod) def test_synthetic_data(self): # test that we create the correct synthetic data by performing a least # squares fit on it assert_(self.data.y_err is not None) x, y, y_err, _ = self.data.data A = np.vstack((np.ones_like(x), x)).T C = np.diag(y_err * y_err) cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A))) b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y))) assert_almost_equal(b_ls, self.b_ls) assert_almost_equal(m_ls, self.m_ls) def test_setp(self): # check that we can set parameters self.p[0].vary = False assert_(len(self.objective.varying_parameters()) == 1) self.objective.setp(np.array([1.23])) assert_equal(self.p[1].value, 1.23) self.objective.setp(np.array([1.234, 1.23])) assert_equal(np.array(self.p), [1.234, 1.23]) def test_pvals(self): assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls]) self.objective.parameters.pvals = [1, 2] assert_equal(self.objective.parameters.pvals, [1, 2.0]) def test_logp(self): self.p[0].range(0, 10) assert_almost_equal(self.objective.logp(), np.log(0.1)) # logp should set parameters self.objective.logp([8, 2]) assert_equal(np.array(self.objective.parameters), [8, 2]) # if we supply a value outside the range it should return -inf assert_equal(self.objective.logp([-1, 2]), -np.inf) def test_logpost(self): # http://dan.iel.fm/emcee/current/user/line/ assert_almost_equal(self.objective.logp(), 0) assert_almost_equal(self.objective.nlpost(), -self.objective.logpost()) # the uncertainties are underestimated in this example... # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_almost_equal(self.objective.logl() + amend, -559.01078135444595) assert_almost_equal(self.objective.logpost() + amend, -559.01078135444595) def test_prior_transform(self): self.p[0].bounds = PDF(stats.uniform(-10, 20)) self.p[1].bounds = PDF(stats.norm(loc=5, scale=10)) x = self.objective.prior_transform([0.1, 0.9]) assert_allclose( x, stats.uniform.ppf(0.1, -10, 20), stats.norm.ppf(0.9, loc=5, scale=10), ) def test_chisqr(self): assert_almost_equal(self.objective.chisqr(), 1231.1096772954229) def test_residuals(self): # weighted, with and without transform assert_almost_equal( self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err, ) objective = Objective(self.model, self.data, transform=Transform("lin")) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective( self.model, self.data, use_weights=False, transform=Transform("lin"), ) assert_almost_equal(objective.residuals(), self.data.y - self.mod) def test_masked_dataset(self): residuals = self.objective.residuals() mask = np.full_like(self.objective.data.y, True, bool) mask[1] = False self.objective.data.mask = mask assert_equal(self.objective.residuals().size, residuals.size - 1) def test_logp_extra(self): original_logl = self.objective.logl() self.objective.logp_extra = logp_extra assert_almost_equal(self.objective.logl(), original_logl + 1) def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, "dumps"): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.logp_extra = logp_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, "dumps"): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) def test_transform_pickle(self): # can you pickle the Transform object? pkl = pickle.dumps(Transform("logY")) pickle.loads(pkl) def test_transform(self): pth = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(pth, "c_PLP0011859_q.txt") data = ReflectDataset(fname) t = Transform("logY") yt, et = t(data.x, data.y, y_err=data.y_err) assert_equal(yt, np.log10(data.y)) yt, _ = t(data.x, data.y, y_err=None) assert_equal(yt, np.log10(data.y)) EPy, EPe = EP.EPlog10(data.y, data.y_err) assert_equal(yt, EPy) assert_equal(et, EPe) def test_repr_transform(self): p = Transform(None) q = eval(repr(p)) assert p.form == q.form p = Transform("logY") q = eval(repr(p)) assert p.form == q.form def test_lnsigma(self): # check that lnsigma works correctly, by using the emcee line fit # example def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) print(inv_sigma2) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) lnsigma = Parameter(np.log(self.f_true), "lnsigma", bounds=(-10, 1), vary=True) self.objective.setp(np.array([self.b_true, self.m_true])) self.objective.lnsigma = lnsigma # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_allclose(self.objective.logl() + amend, bo.logl()) def test_base_emcee(self): # check that the base objective works against the emcee example. def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) # test that the wrapper gives the same logl as the direct function assert_almost_equal(bo.logl(theta), logl(theta, x, y, yerr)) assert_almost_equal(bo.logl(theta), -bo.nll(theta)) assert_almost_equal(bo.nll(theta), 12.8885352412) # Find the maximum likelihood value. result = minimize(bo.nll, theta) # for repeatable sampling np.random.seed(1) ndim, nwalkers = 3, 100 pos = [ result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.logpost) state = emcee.State(pos, random_state=np.random.get_state()) sampler.run_mcmc(state, 800) burnin = 200 samples = sampler.get_chain()[burnin:, :, :].reshape((-1, ndim)) samples[:, 2] = np.exp(samples[:, 2]) m_mc, b_mc, f_mc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0)), ) assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04) assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04) assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06) # # smoke test for covariance matrix bo.parameters = np.array(result["x"]) covar1 = bo.covar() uncertainties = np.sqrt(np.diag(covar1)) # covariance from objective._covar should be almost equal to # the covariance matrix from sampling covar2 = np.cov(samples.T) assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2) # check covariance of self.objective # TODO var_arr = result["x"][:] var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0] # assert_(self.objective.data.weighted) # self.objective.parameters.pvals = var_arr # covar3 = self.objective.covar() # uncertainties3 = np.sqrt(np.diag(covar3)) # assert_almost_equal(uncertainties3, uncertainties) # assert(False) def test_covar(self): # checks objective.covar against optimize.least_squares covariance. path = os.path.dirname(os.path.abspath(__file__)) theoretical = np.loadtxt(os.path.join(path, "gauss_data.txt")) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() p0 = np.array([0.1, 20.0, 0.1, 0.1]) names = ["bkg", "A", "x0", "width"] bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)] params = Parameters(name="gauss_params") for p, name, bound in zip(p0, names, bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True params.append(param) model = Model(params, fitfunc=gauss) data = Data1D((xvals, yvals, evals)) objective = Objective(model, data) # first calculate least_squares jac/hess/covariance matrices res = least_squares(objective.residuals, np.array(params), jac="3-point") hess_least_squares = np.matmul(res.jac.T, res.jac) covar_least_squares = np.linalg.inv(hess_least_squares) # now calculate corresponding matrices by hand, to see if the approach # concurs with least_squares objective.setp(res.x) _pvals = np.array(res.x) def residuals_scaler(vals): return np.squeeze(objective.residuals(_pvals * vals)) jac = approx_derivative(residuals_scaler, np.ones_like(_pvals)) hess = np.matmul(jac.T, jac) covar = np.linalg.inv(hess) covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T assert_allclose(covar, covar_least_squares) # check that objective.covar corresponds to the least_squares # covariance matrix, J.T x J objective.setp(res.x) covar_objective = objective.covar() assert_allclose(covar_objective, covar_least_squares) # sometimes the residuals method may not be usable, see if # objective.covar calculated from a scalar works objective.setp(res.x) covar_objective = objective.covar("nll") assert_allclose( np.sqrt(np.diag(covar_objective)), np.sqrt(np.diag(covar_least_squares)), rtol=0.08, ) # now see what happens with a parameter that has no effect on residuals param = Parameter(1.234, name="dummy") param.vary = True params.append(param) from pytest import raises with raises(LinAlgError): objective.covar() @pytest.mark.xfail def test_pymc3(self): # test objective logl against pymc3 # don't run this test if pymc3 is not installed try: import pymc3 as pm except ImportError: return logl = self.objective.logl() from refnx.analysis import pymc3_model from refnx.analysis.objective import _to_pymc3_distribution mod = pymc3_model(self.objective) with mod: pymc_logl = mod.logp({ "p0": self.p[0].value, "p1": self.p[1].value }) assert_allclose(logl, pymc_logl) # now check some of the distributions with pm.Model(): p = Parameter(1, bounds=(1, 10)) d = _to_pymc3_distribution("a", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) q = Parameter(1, bounds=PDF(stats.uniform(1, 9))) d = _to_pymc3_distribution("b", q) assert_almost_equal(d.distribution.logp(2).eval(), q.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) p = Parameter(1, bounds=PDF(stats.uniform)) d = _to_pymc3_distribution("c", p) assert_almost_equal(d.distribution.logp(0.5).eval(), p.logp(0.5)) p = Parameter(1, bounds=PDF(stats.norm)) d = _to_pymc3_distribution("d", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) p = Parameter(1, bounds=PDF(stats.norm(1, 10))) d = _to_pymc3_distribution("e", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))