def test_goodness_of_fit(): # Let's generate some data with y = Powerlaw(x) gen_function = Powerlaw() # Generate a dataset using the power law, and a # constant 30% error x = np.logspace(0, 2, 50) xyl_generator = XYLike.from_function( "sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x) ) y = xyl_generator.y y_err = xyl_generator.yerr fit_function = Powerlaw() xyl = XYLike("data", x, y, y_err) parameters, like_values = xyl.fit(fit_function) gof, all_results, all_like_values = xyl.goodness_of_fit() # Compute the number of degrees of freedom n_dof = len(xyl.x) - len(fit_function.free_parameters) # Get the observed value for chi2 obs_chi2 = 2 * like_values["-log(likelihood)"]["data"] theoretical_gof = scipy.stats.chi2(n_dof).sf(obs_chi2) assert np.isclose(theoretical_gof, gof["total"], rtol=0.1)
def test_XYLike_chi2(): # Get fake data with Gaussian noise yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # Fit xy = XYLike("test", x, y, yerr) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 res = xy.fit(fitfun) # Verify that the fit converged where it should have assert np.allclose(res[0]['value'].values,[0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713], rtol=0.05) # test not setting yerr xy = XYLike("test", x, y) assert np.all(xy.yerr == np.ones_like(y)) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 res = xy.fit(fitfun)
def test_XYLike_assign_to_source(): # Get fake data with Gaussian noise yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # Fit xy = XYLike("test", x, y, yerr) xy.assign_to_source("pts1") fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 fitfun2 = Line() + Gaussian() fitfun2.F_2 = 60.0 fitfun2.mu_2 = 4.5 pts1 = PointSource("pts1", ra=0.0, dec=0.0, spectral_shape=fitfun) pts2 = PointSource("pts2", ra=2.5, dec=3.2, spectral_shape=fitfun2) for parameter in fitfun2.parameters.values(): parameter.fix = True model = Model(pts1, pts2) data = DataList(xy) jl = JointLikelihood(model, data) _ = jl.fit() predicted_parameters = np.array([0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713]) assert np.allclose([fitfun.a_1.value, fitfun.b_1.value, fitfun.F_2.value, fitfun.mu_2.value, fitfun.sigma_2.value], predicted_parameters, rtol=0.05) # Test that the likelihood does not change by changing the parameters of the other source log_like_before = jl.minus_log_like_profile(*predicted_parameters) fitfun2.F_2 = 120.0 log_like_after = jl.minus_log_like_profile(*predicted_parameters) assert log_like_before == log_like_after # Now test that if we do not assign a source, then the log likelihood value will change xy.assign_to_source(None) # Test that the likelihood this time changes by changing the parameters of the other source log_like_before = jl.minus_log_like_profile(*predicted_parameters) fitfun2.F_2 = 60.0 log_like_after = jl.minus_log_like_profile(*predicted_parameters) assert log_like_before != log_like_after
def test_XYLike_dataframe(): yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # chi2 version xy = XYLike("test", x, y, yerr) df = xy.to_dataframe() # read back in dataframe new_xy = XYLike.from_dataframe("df", df) assert not xy.is_poisson # poisson version xy = XYLike("test", x, y, poisson_data=True) df = xy.to_dataframe() # read back in dataframe new_xy = XYLike.from_dataframe("df", df, poisson=True) assert xy.is_poisson
def test_XYLike_poisson(): # Now Poisson case y = np.array(poiss_sig) xy = XYLike("test", x, y, poisson_data=True) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.F_2.bounds = (0, 200.0) fitfun.mu_2 = 5.0 fitfun.a_1.bounds = (0.1, 5.0) fitfun.b_1.bounds = (0.1, 100.0) res = xy.fit(fitfun) # Verify that the fit converged where it should have #print res[0]['value'] assert np.allclose(res[0]['value'], [0.783748,40.344599 , 71.560055, 4.989727 , 0.330570 ], rtol=0.05)
def test_XYLike_dataframe(): yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # chi2 version xy = XYLike("test", x, y, yerr) df = xy.to_dataframe() # read back in dataframe new_xy = XYLike.from_dataframe('df', df) assert not xy.is_poisson # poisson version xy = XYLike("test", x, y, poisson_data=True) df = xy.to_dataframe() # read back in dataframe new_xy = XYLike.from_dataframe('df', df, poisson=True) assert xy.is_poisson
def test_XYLike_chi2(): # Get fake data with Gaussian noise yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # Fit xy = XYLike("test", x, y, yerr) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 res = xy.fit(fitfun) # Verify that the fit converged where it should have assert np.allclose( res[0]["value"].values, [0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713], rtol=0.05, ) # test not setting yerr xy = XYLike("test", x, y) assert np.all(xy.yerr == np.ones_like(y)) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 res = xy.fit(fitfun)
def test_XYLike_poisson(): # Now Poisson case y = np.array(poiss_sig) xy = XYLike("test", x, y, poisson_data=True) fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.F_2.bounds = (0, 200.0) fitfun.mu_2 = 5.0 fitfun.a_1.bounds = (0.1, 5.0) fitfun.b_1.bounds = (0.1, 100.0) res = xy.fit(fitfun) # Verify that the fit converged where it should have # print res[0]['value'] assert np.allclose(res[0]["value"], [0.783748, 40.344599, 71.560055, 4.989727, 0.330570], rtol=0.05)
def test_XYLike_txt(): yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # chi2 version xy = XYLike("test", x, y, yerr) fname = "test_txt.txt" xy.to_txt(fname) # read back in txt file new_xy = XYLike.from_text_file("txt", fname) assert not xy.is_poisson # poisson version xy = XYLike("test", x, y, poisson_data=True) fname = "test_txt_poisson.txt" xy.to_txt(fname) # read back in txt file new_xy = XYLike.from_text_file("txt", fname) assert new_xy.is_poisson # Remove files os.remove("test_txt.txt") os.remove("test_txt_poisson.txt")
def test_goodness_of_fit(): # Let's generate some data with y = Powerlaw(x) gen_function = Powerlaw() # Generate a dataset using the power law, and a # constant 30% error x = np.logspace(0, 2, 50) xyl_generator = XYLike.from_function("sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x)) y = xyl_generator.y y_err = xyl_generator.yerr fit_function = Powerlaw() xyl = XYLike("data", x, y, y_err) parameters, like_values = xyl.fit(fit_function) gof, all_results, all_like_values = xyl.goodness_of_fit() # Compute the number of degrees of freedom n_dof = len(xyl.x) - len(fit_function.free_parameters) # Get the observed value for chi2 obs_chi2 = 2 * like_values['-log(likelihood)']['data'] theoretical_gof = scipy.stats.chi2(n_dof).sf(obs_chi2) assert np.isclose(theoretical_gof, gof['total'], rtol=0.1)
def xy_model_and_datalist(): y = np.array(poiss_sig) xy = XYLike("test", x, y, poisson_data=True) fitfun = Line() + Gaussian() fitfun.b_1.bounds = (-10, 10.0) fitfun.a_1.bounds = (-100, 100.0) fitfun.F_2 = 60.0 fitfun.F_2.bounds = (1e-3, 200.0) fitfun.mu_2 = 5.0 fitfun.mu_2.bounds = (0.0, 100.0) fitfun.sigma_2.bounds = (1e-3, 10.0) model = Model(PointSource("fake", 0.0, 0.0, fitfun)) data = DataList(xy) return model, data
def test_xy_plot(): # Get fake data with Gaussian noise yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # Fit xy = XYLike("test", x, y, yerr) xy.plot() fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 res = xy.fit(fitfun) xy.plot()
def test_XYLike_txt(): yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # chi2 version xy = XYLike("test", x, y, yerr) fname = 'test_txt.txt' xy.to_txt(fname) # read back in txt file new_xy = XYLike.from_text_file('txt', fname ) assert not xy.is_poisson # poisson version xy = XYLike("test", x, y, poisson_data=True) fname = 'test_txt_poisson.txt' xy.to_txt(fname) # read back in txt file new_xy = XYLike.from_text_file('txt', fname) assert new_xy.is_poisson # Remove files os.remove('test_txt.txt') os.remove('test_txt_poisson.txt')
def generate_one(K): # Let's generate some data with y = Powerlaw(x) gen_function = Powerlaw() gen_function.K = K # Generate a dataset using the power law, and a # constant 30% error x = np.logspace(0, 2, 50) xyl_generator = XYLike.from_function("sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x)) y = xyl_generator.y y_err = xyl_generator.yerr # xyl = XYLike("data", x, y, y_err) # xyl.plot(x_scale='log', y_scale='log') return x, y, y_err
def test_energy_time_fit(): # Let's generate our dataset of 4 spectra with a normalization that follows # a powerlaw in time def generate_one(K): # Let's generate some data with y = Powerlaw(x) gen_function = Powerlaw() gen_function.K = K # Generate a dataset using the power law, and a # constant 30% error x = np.logspace(0, 2, 50) xyl_generator = XYLike.from_function("sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x)) y = xyl_generator.y y_err = xyl_generator.yerr # xyl = XYLike("data", x, y, y_err) # xyl.plot(x_scale='log', y_scale='log') return x, y, y_err time_tags = np.array([1.0, 2.0, 5.0, 10.0]) # This is the power law that defines the normalization as a function of time normalizations = 0.23 * time_tags ** (-1.2) datasets = map(generate_one, normalizations) # Now set up the fit and fit it time = IndependentVariable("time", 1.0, u.s) plugins = [] for i, dataset in enumerate(datasets): x, y, y_err = dataset xyl = XYLike("data%i" % i, x, y, y_err) xyl.tag = (time, time_tags[i]) assert xyl.tag == (time, time_tags[i], None) plugins.append(xyl) data = DataList(*plugins) spectrum = Powerlaw() spectrum.K.bounds = (0.01, 1000.0) src = PointSource("test", 0.0, 0.0, spectrum) model = Model(src) model.add_independent_variable(time) time_po = Powerlaw() time_po.K.bounds = (0.01, 1000) time_po.K.value = 2.0 time_po.index = -1.5 model.link(spectrum.K, time, time_po) jl = JointLikelihood(model, data) jl.set_minimizer("minuit") best_fit_parameters, likelihood_values = jl.fit() # Make sure we are within 10% of the expected result assert np.allclose(best_fit_parameters['value'].values, [0.25496115, -1.2282951 , -2.01508341], rtol=0.1)
def test_XYLike_assign_to_source(): # Get fake data with Gaussian noise yerr = np.array(gauss_sigma) y = np.array(gauss_signal) # Fit xy = XYLike("test", x, y, yerr) xy.assign_to_source("pts1") fitfun = Line() + Gaussian() fitfun.F_2 = 60.0 fitfun.mu_2 = 4.5 fitfun2 = Line() + Gaussian() fitfun2.F_2 = 60.0 fitfun2.mu_2 = 4.5 pts1 = PointSource("pts1", ra=0.0, dec=0.0, spectral_shape=fitfun) pts2 = PointSource("pts2", ra=2.5, dec=3.2, spectral_shape=fitfun2) for parameter in list(fitfun2.parameters.values()): parameter.fix = True model = Model(pts1, pts2) data = DataList(xy) jl = JointLikelihood(model, data) _ = jl.fit() predicted_parameters = np.array( [0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713]) assert np.allclose( [ fitfun.a_1.value, fitfun.b_1.value, fitfun.F_2.value, fitfun.mu_2.value, fitfun.sigma_2.value, ], predicted_parameters, rtol=0.05, ) # Test that the likelihood does not change by changing the parameters of the other source log_like_before = jl.minus_log_like_profile(*predicted_parameters) fitfun2.F_2 = 120.0 log_like_after = jl.minus_log_like_profile(*predicted_parameters) assert log_like_before == log_like_after # Now test that if we do not assign a source, then the log likelihood value will change xy.assign_to_source(None) # Test that the likelihood this time changes by changing the parameters of the other source log_like_before = jl.minus_log_like_profile(*predicted_parameters) fitfun2.F_2 = 60.0 log_like_after = jl.minus_log_like_profile(*predicted_parameters) assert log_like_before != log_like_after
def polyfit(x: Iterable[float], y: Iterable[float], grade: int, exposure: Iterable[float], bayes: bool = False) -> Tuple[Polynomial, float]: """ function to fit a polynomial to data. not a member to allow parallel computation :param x: the x coord of the data :param y: teh y coord of the data :param grade: the polynomical order or grade :param expousure: the exposure of the interval :param bayes: to do a bayesian fit or not """ # Check that we have enough counts to perform the fit, otherwise # return a "zero polynomial" log.debug(f"starting polyfit with grade {grade} ") if threeML_config.time_series.default_fit_method is not None: bayes = threeML_config.time_series.default_fit_method log.debug("using a default poly fit method") nan_mask = np.isnan(y) y = y[~nan_mask] x = x[~nan_mask] exposure = exposure[~nan_mask] non_zero_mask = y > 0 n_non_zero = non_zero_mask.sum() if n_non_zero == 0: log.debug("no counts, return 0") # No data, nothing to do! return Polynomial([0.0]*(grade+1)), 0.0 # create 3ML plugins and fit them with 3ML! # should eventuallly allow better config # seelct the model based on the grade shape = _grade_model_lookup[grade]() ps = PointSource("_dummy", 0, 0, spectral_shape=shape) model = Model(ps) avg = np.mean(y/exposure) log.debug(f"starting polyfit with avg norm {avg}") with silence_console_log(): xy = XYLike("series", x=x, y=y, exposure=exposure, poisson_data=True, quiet=True) if not bayes: # make sure the model is positive for i, (k, v) in enumerate(model.free_parameters.items()): if i == 0: v.bounds = (0, None) v.value = avg else: v.value = 0.0 # we actually use a line here # because a constant is returns a # single number if grade == 0: shape.b = 0 shape.b.fix = True jl: JointLikelihood = JointLikelihood(model, DataList(xy)) jl.set_minimizer("minuit") # if the fit falis, retry and then just accept try: jl.fit(quiet=True) except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance): log.debug("1st fit failed") try: jl.fit(quiet=True) except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance): log.debug("all MLE fits failed") pass coeff = [v.value for _, v in model.free_parameters.items()] log.debug(f"got coeff: {coeff}") final_polynomial = Polynomial(coeff) try: final_polynomial.set_covariace_matrix( jl.results.covariance_matrix) except: log.exception(f"Fit failed in channel") raise FitFailed() min_log_likelihood = xy.get_log_like() else: # set smart priors for i, (k, v) in enumerate(model.free_parameters.items()): if i == 0: v.bounds = (0, None) v.prior = Log_normal( mu=np.log(avg), sigma=np.max([np.log(avg/2), 1])) v.value = 1 else: v.prior = Gaussian(mu=0, sigma=2) v.value = 1e-2 # we actually use a line here # because a constant is returns a # single number if grade == 0: shape.b = 0 shape.b.fix = True ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy)) ba.set_sampler("emcee") ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20) ba.sample(quiet=True) ba.restore_median_fit() coeff = [v.value for _, v in model.free_parameters.items()] log.debug(f"got coeff: {coeff}") final_polynomial = Polynomial(coeff) final_polynomial.set_covariace_matrix( ba.results.estimate_covariance_matrix()) min_log_likelihood = xy.get_log_like() log.debug(f"-min loglike: {-min_log_likelihood}") return final_polynomial, -min_log_likelihood
def test_energy_time_fit(): # Let's generate our dataset of 4 spectra with a normalization that follows # a powerlaw in time def generate_one(K): # Let's generate some data with y = Powerlaw(x) gen_function = Powerlaw() gen_function.K = K # Generate a dataset using the power law, and a # constant 30% error x = np.logspace(0, 2, 50) xyl_generator = XYLike.from_function("sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x)) y = xyl_generator.y y_err = xyl_generator.yerr # xyl = XYLike("data", x, y, y_err) # xyl.plot(x_scale='log', y_scale='log') return x, y, y_err time_tags = np.array([1.0, 2.0, 5.0, 10.0]) # This is the power law that defines the normalization as a function of time normalizations = 0.23 * time_tags**(-1.2) datasets = list(map(generate_one, normalizations)) # Now set up the fit and fit it time = IndependentVariable("time", 1.0, u.s) plugins = [] for i, dataset in enumerate(datasets): x, y, y_err = dataset xyl = XYLike("data%i" % i, x, y, y_err) xyl.tag = (time, time_tags[i]) assert xyl.tag == (time, time_tags[i], None) plugins.append(xyl) data = DataList(*plugins) spectrum = Powerlaw() spectrum.K.bounds = (0.01, 1000.0) src = PointSource("test", 0.0, 0.0, spectrum) model = Model(src) model.add_independent_variable(time) time_po = Powerlaw() time_po.K.bounds = (0.01, 1000) time_po.K.value = 2.0 time_po.index = -1.5 model.link(spectrum.K, time, time_po) jl = JointLikelihood(model, data) jl.set_minimizer("minuit") best_fit_parameters, likelihood_values = jl.fit() # Make sure we are within 10% of the expected result assert np.allclose( best_fit_parameters["value"].values, [0.25496115, -1.2282951, -2.01508341], rtol=0.1, )