def test_backwards_compatible_fitting(self): """ In 0.4.2 we replaced the usage of inspect by automatically generated names. This can cause problems for users using named variables to call fit. """ xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter(value=1.0) b = Parameter(value=2.5) y = Variable('y') with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") x = Variable() self.assertTrue(len(w) == 1) self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) model = {y: a*x**b} with self.assertRaises(TypeError): fit = Fit(model, x=xdata, y=ydata)
def setUpClass(cls): mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] data = np.random.multivariate_normal(mean, cov, 1000000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T # T because np f***s up conventions. x0 = Parameter('x0', value=0.6) sig_x = Parameter('sig_x', value=0.2, min=0.0) x = Variable('x') y0 = Parameter('y0', value=0.4) sig_y = Parameter('sig_y', value=0.1, min=0.0) A = Parameter('A') y = Variable('y') z = Variable('z') g = {z: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)} cls.g = g # cls.xdata = xdata # cls.ydata = ydata cls.guess = interactive_guess.InteractiveGuess(g, x=xx.flatten(), y=yy.flatten(), z=ydata.flatten())
def test_2D_fitting(): """ Makes sure that a scalar model with 2 independent variables has the proper signature, and that the fit result is of the correct type. """ xdata = np.random.randint(-10, 11, size=(2, 400)) zdata = 2.5 * xdata[0]**2 + 7.0 * xdata[1]**2 a = Parameter('a') b = Parameter('b') x = Variable('x') y = Variable('y') new = a * x**2 + b * y**2 fit = Fit(new, xdata[0], xdata[1], zdata) result = fit.model(xdata[0], xdata[1], 2, 3) assert isinstance(result, tuple) for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(fit.model).parameters): assert arg_name == name fit_result = fit.execute() assert isinstance(fit_result, FitResults)
def model_gauss2d(a_val, x_mu_val, y_mu_val, sig_x_val, sig_y_val, base, has_base=True): a = Parameter(name='a', value=a_val) sig_x = Parameter(name='sig_x', value=sig_x_val) sig_y = Parameter(name='sig_y', value=sig_y_val) x_mu = Parameter(name='x_mu', value=x_mu_val) y_mu = Parameter(name='y_mu', value=y_mu_val) if has_base: b = Parameter(name='b', value=base) else: b = base x_var = Variable(name='x_var') y_var = Variable(name='y_var') z_var = Variable(name='z_var') model = { z_var: a * exp(-(((x_var - x_mu)**2 / (2 * sig_x**2)) + ((y_var - y_mu)**2 / (2 * sig_y**2)))) + b } return model
def test_custom_objective(recwarn): """ Compare the result of a custom objective with the symbolic result. :return: """ # Create test data xdata = np.linspace(0, 100, 25) # From 0 to 100 in 100 steps a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 # Normal symbolic fit a = Parameter('a', value=0, min=0.0, max=1000) b = Parameter('b', value=0, min=0.0, max=1000) x = Variable('x') y = Variable('y') model = {y: a * x + b} fit = Fit(model, xdata, ydata, minimizer=BFGS) fit_result = fit.execute() def f(x, a, b): return a * x + b def chi_squared(a, b): return np.sum((ydata - f(xdata, a, b))**2) # Should no longer raise warnings, because internally we practice # what we preach. fit_custom = BFGS(chi_squared, [a, b]) assert len(recwarn) == 0 fit_custom_result = fit_custom.execute() assert isinstance(fit_custom_result, FitResults) assert fit_custom_result.value(a) == pytest.approx(fit_result.value(a), 1e-5) assert fit_custom_result.value(b) == pytest.approx(fit_result.value(b), 1e-4) # New preferred usage, multi component friendly. with pytest.raises(TypeError): callable_model = CallableNumericalModel( chi_squared, connectivity_mapping={y: {a, b}}) callable_model = CallableNumericalModel({y: chi_squared}, connectivity_mapping={y: {a, b}}) assert callable_model.params == [a, b] assert callable_model.independent_vars == [] assert callable_model.dependent_vars == [y] assert callable_model.interdependent_vars == [] assert callable_model.connectivity_mapping == {y: {a, b}} fit_custom = BFGS(callable_model, [a, b]) fit_custom_result = fit_custom.execute() assert isinstance(fit_custom_result, FitResults) assert fit_custom_result.value(a) == pytest.approx(fit_result.value(a), 1e-5) assert fit_custom_result.value(b) == pytest.approx(fit_result.value(b), 1e-4)
def test_2_gaussian_2d_fitting(): """ Tests fitting to a scalar gaussian with 2 independent variables with tight bounds. """ mean = (0.3, 0.4) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] # TODO: evaluate gaussian at 100x100 points and add appropriate noise data = np.random.multivariate_normal(mean, cov, 3000000) mean = (0.7, 0.8) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data_2 = np.random.multivariate_normal(mean, cov, 3000000) data = np.vstack((data, data_2)) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T x = Variable('x') y = Variable('y') x0_1 = Parameter('x0_1', value=0.7, min=0.6, max=0.9) sig_x_1 = Parameter('sig_x_1', value=0.1, min=0.0, max=0.2) y0_1 = Parameter('y0_1', value=0.8, min=0.6, max=0.9) sig_y_1 = Parameter('sig_y_1', value=0.1, min=0.0, max=0.2) A_1 = Parameter('A_1') g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) x0_2 = Parameter('x0_2', value=0.3, min=0.2, max=0.5) sig_x_2 = Parameter('sig_x_2', value=0.1, min=0.0, max=0.2) y0_2 = Parameter('y0_2', value=0.4, min=0.2, max=0.5) sig_y_2 = Parameter('sig_y_2', value=0.1, min=0.0, max=0.2) A_2 = Parameter('A_2') g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) model = GradientModel(g_1 + g_2) fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() assert isinstance(fit.minimizer, LBFGSB) img = model(x=xx, y=yy, **fit_result.params)[0] img_g_1 = g_1(x=xx, y=yy, **fit_result.params) img_g_2 = g_2(x=xx, y=yy, **fit_result.params) assert img == pytest.approx(img_g_1 + img_g_2) # Equal up to some precision. Not much obviously. assert fit_result.value(x0_1) == pytest.approx(0.7, 1e-3) assert fit_result.value(y0_1) == pytest.approx(0.8, 1e-3) assert fit_result.value(x0_2) == pytest.approx(0.3, 1e-3) assert fit_result.value(y0_2) == pytest.approx(0.4, 1e-3)
def test_gaussian_2d_fitting(): """ Tests fitting to a scalar gaussian function with 2 independent variables. Very sensitive to initial guesses, and if they are chosen too restrictive Fit actually throws a tantrum. It therefore appears to be more sensitive than NumericalLeastSquares. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] np.random.seed(0) data = np.random.multivariate_normal(mean, cov, 100000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=mean[0], min=0.0, max=1.0) sig_x = Parameter(value=0.2, min=0.0, max=0.3) y0 = Parameter(value=mean[1], min=0.0, max=1.0) sig_y = Parameter(value=0.1, min=0.0, max=0.3) A = Parameter(value=np.mean(ydata), min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = GradientModel( {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3) assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3) assert np.abs(fit_result.value(sig_x)) == pytest.approx( np.std(data[:, 0]), 1e-2) assert np.abs(fit_result.value(sig_y)) == pytest.approx( np.std(data[:, 1]), 1e-2) assert (fit_result.r_squared, 0.96) # Compare with industry standard MINPACK fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) fit_std_result = fit_std.execute() assert fit_std_result.value(x0) == pytest.approx(fit_result.value(x0), 1e-4) assert fit_std_result.value(y0) == pytest.approx(fit_result.value(y0), 1e-4) assert fit_std_result.value(sig_x) == pytest.approx( fit_result.value(sig_x), 1e-4) assert fit_std_result.value(sig_y) == pytest.approx( fit_result.value(sig_y), 1e-4) assert fit_std_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4)
def test_2_gaussian_2d_fitting(self): """ Tests fitting to a scalar gaussian with 2 independent variables with tight bounds. """ mean = (0.3, 0.4) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data = np.random.multivariate_normal(mean, cov, 3000000) mean = (0.7, 0.8) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data_2 = np.random.multivariate_normal(mean, cov, 3000000) data = np.vstack((data, data_2)) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T x = Variable() y = Variable() x0_1 = Parameter(0.7, min=0.6, max=0.9) sig_x_1 = Parameter(0.1, min=0.0, max=0.2) y0_1 = Parameter(0.8, min=0.6, max=0.9) sig_y_1 = Parameter(0.1, min=0.0, max=0.2) A_1 = Parameter() g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) x0_2 = Parameter(0.3, min=0.2, max=0.5) sig_x_2 = Parameter(0.1, min=0.0, max=0.2) y0_2 = Parameter(0.4, min=0.2, max=0.5) sig_y_2 = Parameter(0.1, min=0.0, max=0.2) A_2 = Parameter() g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) model = g_1 + g_2 fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() self.assertIsInstance(fit.fit, ConstrainedNumericalLeastSquares) img = model(x=xx, y=yy, **fit_result.params) img_g_1 = g_1(x=xx, y=yy, **fit_result.params) img_g_2 = g_2(x=xx, y=yy, **fit_result.params) np.testing.assert_array_equal(img, img_g_1 + img_g_2) # Equal up to some precision. Not much obviously. self.assertAlmostEqual(fit_result.value(x0_1), 0.7, 3) self.assertAlmostEqual(fit_result.value(y0_1), 0.8, 3) self.assertAlmostEqual(fit_result.value(x0_2), 0.3, 3) self.assertAlmostEqual(fit_result.value(y0_2), 0.4, 3)
def test_gaussian_2d_fitting_background(): """ Tests fitting to a scalar gaussian function with 2 independent variables to data with a background. Added after #149. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] background = 3.0 # TODO: Since we bin this data later on in a 100 bins, just evaluate 100 # points on a Gaussian, and add an appropriate amount of noise. This # burns CPU cycles without good reason. data = np.random.multivariate_normal(mean, cov, 500000) # print(data.shape) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 ydata += background # Background # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter('x0', value=1.1 * mean[0], min=0.0, max=1.0) sig_x = Parameter('sig_x', value=1.1 * 0.2, min=0.0, max=0.3) y0 = Parameter('y0', value=1.1 * mean[1], min=0.0, max=1.0) sig_y = Parameter('sig_y', value=1.1 * 0.1, min=0.0, max=0.3) A = Parameter('A', value=1.1 * np.mean(ydata), min=0.0) b = Parameter('b', value=1.2 * background, min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = GradientModel( {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b}) # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() assert fit_result.value(x0) / np.mean(data[:, 0]) == pytest.approx( 1.0, 1e-2) assert fit_result.value(y0) / np.mean(data[:, 1]) == pytest.approx( 1.0, 1e-2) assert np.abs(fit_result.value(sig_x)) / np.std( data[:, 0]) == pytest.approx(1.0, 1e-2) assert np.abs(fit_result.value(sig_y)) / np.std( data[:, 1]) == pytest.approx(1.0, 1e-2) assert background / fit_result.value(b) == pytest.approx(1.0, 1e-1) assert fit_result.r_squared >= 0.96
def test_fitting_2(self): np.random.seed(4242) mean = (0.3, 0.3) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0.4], [0.4, 0.01**2]] data = np.random.multivariate_normal(mean, cov, 1000000) mean = (0.7, 0.7) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data_2 = np.random.multivariate_normal(mean, cov, 1000000) data = np.vstack((data, data_2)) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T x = Variable() y = Variable() x0_1 = Parameter(0.7, min=0.6, max=0.8) sig_x_1 = Parameter(0.1, min=0.0, max=0.2) y0_1 = Parameter(0.7, min=0.6, max=0.8) sig_y_1 = Parameter(0.1, min=0.0, max=0.2) A_1 = Parameter() g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) x0_2 = Parameter(0.3, min=0.2, max=0.4) sig_x_2 = Parameter(0.1, min=0.0, max=0.2) y0_2 = Parameter(0.3, min=0.2, max=0.4) sig_y_2 = Parameter(0.1, min=0.0, max=0.2) A_2 = Parameter() g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) model = g_1 + g_2 fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() for param in fit.model.params: self.assertAlmostEqual( fit_result.stdev(param)**2, fit_result.variance(param)) # Covariance matrix should be symmetric for param_1 in fit.model.params: for param_2 in fit.model.params: self.assertAlmostEqual(fit_result.covariance(param_1, param_2), fit_result.covariance(param_2, param_1))
def setUpClass(cls): x = Variable() y1 = Variable() y2 = Variable() k = Parameter(900) x0 = Parameter(1.5) model = {y1: k * (x-x0)**2, y2: x - x0} x_data = np.linspace(0, 2.5, 50) y1_data = model[y1](x=x_data, k=1000, x0=1) y2_data = model[y2](x=x_data, k=1000, x0=1) cls.guess = interactive_guess.InteractiveGuess2D(model, x=x_data, y1=y1_data, y2=y2_data)
def setUp(self): x = Variable('x') y = Variable('y') xmin, xmax = -5, 5 self.x0_1 = Parameter('x01', value=0, min=xmin, max=xmax) self.sig_x_1 = Parameter('sigx1', value=0, min=0.0, max=1) self.y0_1 = Parameter('y01', value=0, min=xmin, max=xmax) self.sig_y_1 = Parameter('sigy1', value=0, min=0.0, max=1) self.A_1 = Parameter('A1', min=0, max=1000) g_1 = self.A_1 * Gaussian(x, self.x0_1, self.sig_x_1) *\ Gaussian(y, self.y0_1, self.sig_y_1) self.model = Model(g_1)
def test_gaussian_2d_fitting_background(self): """ Tests fitting to a scalar gaussian function with 2 independent variables to data with a background. Added after #149. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] background = 3.0 data = np.random.multivariate_normal(mean, cov, 500000) # print(data.shape) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 ydata += background # Background # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=1.1 * mean[0], min=0.0, max=1.0) sig_x = Parameter(value=1.1 * 0.2, min=0.0, max=0.3) y0 = Parameter(value=1.1 * mean[1], min=0.0, max=1.0) sig_y = Parameter(value=1.1 * 0.1, min=0.0, max=0.3) A = Parameter(value=1.1 * np.mean(ydata), min=0.0) b = Parameter(value=1.2 * background, min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = Model( {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b}) # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() self.assertAlmostEqual( fit_result.value(x0) / np.mean(data[:, 0]), 1.0, 2) self.assertAlmostEqual( fit_result.value(y0) / np.mean(data[:, 1]), 1.0, 2) self.assertAlmostEqual( np.abs(fit_result.value(sig_x)) / np.std(data[:, 0]), 1.0, 2) self.assertAlmostEqual( np.abs(fit_result.value(sig_y)) / np.std(data[:, 1]), 1.0, 2) self.assertAlmostEqual(background / fit_result.value(b), 1.0, 1) self.assertGreaterEqual(fit_result.r_squared / 0.96, 1.0)
def test_fitting(self): """ Tests fitting with NumericalLeastSquares. Makes sure that the resulting objects and values are of the right type, and that the fit_result does not have unexpected members. """ xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter() # 3.1, min=2.5, max=3.5 b = Parameter() x = Variable() new = a*x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0) self.assertIsInstance(fit_result.stdev(a), float) self.assertIsInstance(fit_result.stdev(b), float) self.assertIsInstance(fit_result.r_squared, float) self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness
def test_fixed_parameters_2(): """ Make sure parameter boundaries are respected """ x = Parameter('x', min=1) y = Variable('y') model = Model({y: x**2}) bounded_minimizers = list(subclasses(BoundedMinimizer)) for minimizer in bounded_minimizers: if minimizer is MINPACK: # Not a MINPACKable problem because it only has a param continue fit = Fit(model, minimizer=minimizer) assert isinstance(fit.objective, MinimizeModel) if minimizer is DifferentialEvolution: # Also needs a max x.max = 10 fit_result = fit.execute() x.max = None else: fit_result = fit.execute() assert fit_result.value(x) >= 1.0 assert fit_result.value(x) <= 2.0 assert fit.minimizer.bounds == [(1, None)]
def test_likelihood_fitting_gaussian(self): """ Fit using the likelihood method. """ mu, sig = parameters('mu, sig') sig.min = 0.01 sig.value = 3.0 mu.value = 50. x = Variable() pdf = Gaussian(x, mu, sig) np.random.seed(10) xdata = np.random.normal(51., 3.5, 10000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev/np.sqrt(len(xdata)) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(mu) / mean, 1, 6) self.assertAlmostEqual(fit_result.stdev(mu) / mean_stdev, 1, 3) self.assertAlmostEqual(fit_result.value(sig) / np.std(xdata), 1, 6)
def test_likelihood_fitting_gaussian(): """ Fit using the likelihood method. """ mu, sig = parameters('mu, sig') sig.min = 0.01 sig.value = 3.0 mu.value = 50. x = Variable('x') pdf = GradientModel(Gaussian(x, mu, sig)) np.random.seed(10) # TODO: Do we really need 1k points? xdata = np.random.normal(51., 3.5, 10000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() assert fit_result.value(mu) == pytest.approx(mean, 1e-6) assert fit_result.stdev(mu) == pytest.approx(mean_stdev, 1e-3) assert fit_result.value(sig) == pytest.approx(np.std(xdata), 1e-6)
def test_gaussian_fitting(): """ Tests fitting to a gaussian function and fit_result.params unpacking. """ xdata = 2 * np.random.rand(10000) - 1 # random betwen [-1, 1] ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) x0 = Parameter('x0') sig = Parameter('sig') A = Parameter('A') x = Variable('x') g = GradientModel(A * Gaussian(x, x0, sig)) fit = Fit(g, xdata, ydata) assert isinstance(fit.objective, LeastSquares) fit_result = fit.execute() assert fit_result.value(A) == pytest.approx(5.0) assert np.abs(fit_result.value(sig)) == pytest.approx(1.0) assert fit_result.value(x0) == pytest.approx(0.0) # raise Exception([i for i in fit_result.params]) sexy = g(x=2.0, **fit_result.params) ugly = g( x=2.0, x0=fit_result.value(x0), A=fit_result.value(A), sig=fit_result.value(sig), ) assert sexy == ugly
def test_fitting(): """ Tests fitting with NumericalLeastSquares. Makes sure that the resulting objects and values are of the right type, and that the fit_result does not have unexpected members. """ xdata = np.linspace(1, 10, 10) ydata = 3 * xdata**2 a = Parameter('a') # 3.1, min=2.5, max=3.5 b = Parameter('b') x = Variable('x') new = a * x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() assert isinstance(fit_result, FitResults) assert fit_result.value(a) == pytest.approx(3.0) assert fit_result.value(b) == pytest.approx(2.0) assert isinstance(fit_result.stdev(a), float) assert isinstance(fit_result.stdev(b), float) assert isinstance(fit_result.r_squared, float) assert fit_result.r_squared == 1.0 # by definition since there's no fuzzyness
def gen_fit_objs(x, a, minimizer): """Generates linear fits with different a parameter values.""" for a_i in a: a_par = Parameter('a', 4.0, min=0.0, max=20) b_par = Parameter('b', 1.2, min=0.0, max=2) x_var = Variable('x') y_var = Variable('y') con_map = {y_var: {x_var, a_par, b_par}} model = CallableNumericalModel({y_var: f}, connectivity_mapping=con_map) fit = Fit( model, x, a_i * x + 1, minimizer=minimizer, objective=SqrtLeastSquares if minimizer is not MINPACK else VectorLeastSquares ) yield fit
def setUpClass(cls): np.random.seed(0) x = Variable() y = Variable() k = Parameter(900) x0 = Parameter(1.5) # You can NOT do this in one go. Blame Sympy. Not my fault. cls.k = k cls.x0 = x0 model = {y: distr(x, k, x0)} x_data = np.linspace(0, 2.5, 50) y_data = model[y](x=x_data, k=1000, x0=1) cls.guess = interactive_guess.InteractiveGuess2D(model, x=x_data, y=y_data)
def __init__(self, cell_obj, cell_function): self.cell_obj = cell_obj self.cell_function = cell_function r = Parameter('r', value=cell_obj.coords.r, min=cell_obj.coords.r / 4, max=cell_obj.coords.r * 4) xl = Parameter('xl', value=cell_obj.coords.xl, min=cell_obj.coords.xl - cfg.ENDCAP_RANGE / 2, max=cell_obj.coords.xl + cfg.ENDCAP_RANGE / 2) xr = Parameter('xr', value=cell_obj.coords.xr, min=cell_obj.coords.xr - cfg.ENDCAP_RANGE / 2, max=cell_obj.coords.xr + cfg.ENDCAP_RANGE / 2) a0 = Parameter('a0', value=cell_obj.coords.coeff[0], min=0, max=cell_obj.data.shape[0] * 1.5) a1 = Parameter('a1', value=cell_obj.coords.coeff[1], min=-15, max=15) a2 = Parameter('a2', value=cell_obj.coords.coeff[2], min=-0.05, max=0.05) y = Variable('y') parameters = [a0, a1, a2, r, xl, xr] super(NumericalCellModel, self).__init__({y: cell_function}, [], parameters)
def test_gaussian_fitting(self): """ Tests fitting to a gaussian function and fit_result.params unpacking. """ xdata = 2*np.random.rand(10000) - 1 # random betwen [-1, 1] ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) x0 = Parameter('x0') sig = Parameter('sig') A = Parameter('A') x = Variable('x') g = A * Gaussian(x, x0, sig) fit = Fit(g, xdata, ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(A), 5.0) self.assertAlmostEqual(np.abs(fit_result.value(sig)), 1.0) self.assertAlmostEqual(fit_result.value(x0), 0.0) # raise Exception([i for i in fit_result.params]) sexy = g(x=2.0, **fit_result.params) ugly = g( x=2.0, x0=fit_result.value(x0), A=fit_result.value(A), sig=fit_result.value(sig), ) self.assertEqual(sexy, ugly)
def test_custom_objective(self): """ Compare the result of a custom objective with the symbolic result. :return: """ # Create test data xdata = np.linspace(0, 100, 25) # From 0 to 100 in 100 steps a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 # Normal symbolic fit a = Parameter('a', value=0, min=0.0, max=1000) b = Parameter('b', value=0, min=0.0, max=1000) x = Variable() model = a * x + b fit = Fit(model, xdata, ydata, minimizer=BFGS) fit_result = fit.execute() def f(x, a, b): return a * x + b def chi_squared(a, b): return np.sum((ydata - f(xdata, a, b))**2) fit_custom = BFGS(chi_squared, [a, b]) fit_custom_result = fit_custom.execute() self.assertIsInstance(fit_custom_result, FitResults) self.assertAlmostEqual( fit_custom_result.value(a) / fit_result.value(a), 1.0, 5) self.assertAlmostEqual( fit_custom_result.value(b) / fit_result.value(b), 1.0, 4)
def test_2D_fitting(self): np.random.seed(1) xdata = np.random.randint(-10, 11, size=(2, 100)) zdata = 2.5 * xdata[0]**2 + 7.0 * xdata[1]**2 a = Parameter() b = Parameter(10) x = Variable() y = Variable() z = Variable() new = {z: a * x**2 + b * y**2} fit = NonLinearLeastSquares(new, x=xdata[0], y=xdata[1], z=zdata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), 2.5) self.assertAlmostEqual(np.abs(fit_result.value(b)), 7.0)
def _get_poly(degree): x = Variable(name='x') params = [Parameter(name='a' + str(i)) for i in range(degree + 1)] p = params[0] for i in np.arange(1, degree + 1): p += params[i] * x**i return p
def setUpClass(cls): np.random.seed(0) x = Variable('x') y = Variable('y') k = Parameter('k', 900) x0 = Parameter('x0', 1.5) cls.k = k cls.x0 = x0 model = {y: distr(x, k, x0)} x_data = np.linspace(0, 2.5, 50) y_data = model[y](x=x_data, k=1000, x0=1) cls.guess = interactive_guess.InteractiveGuess(model, x=x_data, y=y_data)
def test_gaussian_2d_fitting(): """ Tests fitting to a scalar gaussian function with 2 independent variables. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] # TODO: Since we bin this data later on in a 100 bins, just evaluate 100 # points on a Gaussian, and add an appropriate amount of noise. This # burns CPU cycles without good reason. data = np.random.multivariate_normal(mean, cov, 1000000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=mean[0], min=0.0, max=1.0) sig_x = Parameter(value=0.2, min=0.0, max=0.3) y0 = Parameter(value=mean[1], min=0.0, max=1.0) sig_y = Parameter(value=0.1, min=0.0, max=0.3) A = Parameter(value=np.mean(ydata), min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = GradientModel( {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3) assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3) assert np.abs(fit_result.value(sig_x)) == pytest.approx( np.std(data[:, 0]), 1e-2) assert np.abs(fit_result.value(sig_y)) == pytest.approx( np.std(data[:, 1]), 1e-2) assert fit_result.r_squared >= 0.96
def test_error_advanced(self): """ Compare the error propagation of ConstrainedNumericalLeastSquares against NumericalLeastSquares. Models an example from the mathematica docs and try's to replicate it: http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html """ data = [[0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], [4.7, 8.4, 10.]] xdata, ydata, zdata = [np.array(data) for data in zip(*data)] # errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) a = Parameter(3.0) b = Parameter(0.9) c = Parameter(5.0) x = Variable() y = Variable() z = Variable() model = {z: a * log(b * x + c * y)} const_fit = ConstrainedNumericalLeastSquares(model, xdata, ydata, zdata, absolute_sigma=False) const_result = const_fit.execute() fit = NumericalLeastSquares(model, xdata, ydata, zdata, absolute_sigma=False) std_result = fit.execute() self.assertEqual(const_fit.absolute_sigma, fit.absolute_sigma) self.assertAlmostEqual(const_result.value(a), std_result.value(a), 4) self.assertAlmostEqual(const_result.value(b), std_result.value(b), 4) self.assertAlmostEqual(const_result.value(c), std_result.value(c), 4) self.assertAlmostEqual(const_result.stdev(a), std_result.stdev(a), 4) self.assertAlmostEqual(const_result.stdev(b), std_result.stdev(b), 4) self.assertAlmostEqual(const_result.stdev(c), std_result.stdev(c), 4)
def test_likelihood_fitting_bivariate_gaussian(): """ Fit using the likelihood method. """ # Make variables and parameters x = Variable('x') y = Variable('y') x0 = Parameter('x0', value=0.6, min=0.5, max=0.7) sig_x = Parameter('sig_x', value=0.1, max=1.0) y0 = Parameter('y0', value=0.7, min=0.6, max=0.9) sig_y = Parameter('sig_y', value=0.05, max=1.0) rho = Parameter('rho', value=0.001, min=-1, max=1) pdf = BivariateGaussian(x=x, mu_x=x0, sig_x=sig_x, y=y, mu_y=y0, sig_y=sig_y, rho=rho) # Draw 100000 samples from a bivariate distribution mean = [0.59, 0.8] r = 0.6 cov = np.array([[0.11**2, 0.11 * 0.23 * r], [0.11 * 0.23 * r, 0.23**2]]) np.random.seed(42) # TODO: Do we really need 100k points? xdata, ydata = np.random.multivariate_normal(mean, cov, 100000).T fit = Fit(pdf, x=xdata, y=ydata, objective=LogLikelihood) fit_result = fit.execute() assert fit_result.value(x0) == pytest.approx(mean[0], 1e-2) assert fit_result.value(y0) == pytest.approx(mean[1], 1e-2) assert fit_result.value(sig_x) == pytest.approx(np.sqrt(cov[0, 0]), 1e-2) assert fit_result.value(sig_y) == pytest.approx(np.sqrt(cov[1, 1]), 1e-2) assert fit_result.value(rho) == pytest.approx(r, 1e-2) marginal = integrate(pdf, (y, -oo, oo), conds='none') fit = Fit(marginal, x=xdata, objective=LogLikelihood) with pytest.raises(NameError): # Should raise a NameError, not a TypeError, see #219 fit.execute()
def test_slots(self): """ Make sure Parameters and Variables don't have a __dict__ """ P = Parameter('P') # If you only have __slots__ you can't set arbitrary attributes, but # you *should* be able to set those that are in your __slots__ try: P.min = 0 except AttributeError: self.fail() with self.assertRaises(AttributeError): P.foo = None V = Variable('V') with self.assertRaises(AttributeError): V.bar = None