def test_goodness_of_fit():

    # Let's generate some data with y = Powerlaw(x)

    gen_function = Powerlaw()

    # Generate a dataset using the power law, and a
    # constant 30% error

    x = np.logspace(0, 2, 50)

    xyl_generator = XYLike.from_function(
        "sim_data", function=gen_function, x=x, yerr=0.3 * gen_function(x)
    )

    y = xyl_generator.y
    y_err = xyl_generator.yerr

    fit_function = Powerlaw()

    xyl = XYLike("data", x, y, y_err)

    parameters, like_values = xyl.fit(fit_function)

    gof, all_results, all_like_values = xyl.goodness_of_fit()

    # Compute the number of degrees of freedom
    n_dof = len(xyl.x) - len(fit_function.free_parameters)

    # Get the observed value for chi2
    obs_chi2 = 2 * like_values["-log(likelihood)"]["data"]

    theoretical_gof = scipy.stats.chi2(n_dof).sf(obs_chi2)

    assert np.isclose(theoretical_gof, gof["total"], rtol=0.1)
Пример #2
0
def test_XYLike_chi2():

    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)

    # Verify that the fit converged where it should have
    assert np.allclose(res[0]['value'].values,[0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713], rtol=0.05)

    # test not setting yerr

    xy = XYLike("test", x, y)

    assert np.all(xy.yerr == np.ones_like(y))

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)
Пример #3
0
def test_XYLike_assign_to_source():

    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    xy.assign_to_source("pts1")

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    fitfun2 = Line() + Gaussian()
    fitfun2.F_2 = 60.0
    fitfun2.mu_2 = 4.5

    pts1 = PointSource("pts1", ra=0.0, dec=0.0, spectral_shape=fitfun)
    pts2 = PointSource("pts2", ra=2.5, dec=3.2, spectral_shape=fitfun2)

    for parameter in fitfun2.parameters.values():
        parameter.fix = True

    model = Model(pts1, pts2)
    data = DataList(xy)

    jl = JointLikelihood(model, data)

    _ = jl.fit()

    predicted_parameters = np.array([0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713])

    assert np.allclose([fitfun.a_1.value, fitfun.b_1.value, fitfun.F_2.value, fitfun.mu_2.value, fitfun.sigma_2.value],
                       predicted_parameters, rtol=0.05)

    # Test that the likelihood does not change by changing the parameters of the other source
    log_like_before = jl.minus_log_like_profile(*predicted_parameters)

    fitfun2.F_2 = 120.0

    log_like_after = jl.minus_log_like_profile(*predicted_parameters)

    assert log_like_before == log_like_after

    # Now test that if we do not assign a source, then the log likelihood value will change
    xy.assign_to_source(None)

    # Test that the likelihood this time changes by changing the parameters of the other source
    log_like_before = jl.minus_log_like_profile(*predicted_parameters)

    fitfun2.F_2 = 60.0

    log_like_after = jl.minus_log_like_profile(*predicted_parameters)

    assert log_like_before != log_like_after
Пример #4
0
def test_XYLike_dataframe():

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # chi2 version

    xy = XYLike("test", x, y, yerr)

    df = xy.to_dataframe()

    # read back in dataframe

    new_xy = XYLike.from_dataframe("df", df)

    assert not xy.is_poisson

    # poisson version

    xy = XYLike("test", x, y, poisson_data=True)

    df = xy.to_dataframe()

    # read back in dataframe

    new_xy = XYLike.from_dataframe("df", df, poisson=True)

    assert xy.is_poisson
Пример #5
0
def test_XYLike_poisson():

    # Now Poisson case
    y = np.array(poiss_sig)

    xy = XYLike("test", x, y, poisson_data=True)

    fitfun = Line() + Gaussian()

    fitfun.F_2 = 60.0
    fitfun.F_2.bounds = (0, 200.0)
    fitfun.mu_2 = 5.0
    fitfun.a_1.bounds = (0.1, 5.0)
    fitfun.b_1.bounds = (0.1, 100.0)

    res = xy.fit(fitfun)

    # Verify that the fit converged where it should have

    #print res[0]['value']
    assert np.allclose(res[0]['value'], [0.783748,40.344599 , 71.560055, 4.989727 , 0.330570 ], rtol=0.05)
Пример #6
0
def test_XYLike_dataframe():


    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # chi2 version

    xy = XYLike("test", x, y, yerr)

    df = xy.to_dataframe()

    # read back in dataframe

    new_xy = XYLike.from_dataframe('df', df)

    assert not xy.is_poisson

    # poisson version

    xy = XYLike("test", x, y, poisson_data=True)

    df = xy.to_dataframe()

    # read back in dataframe

    new_xy = XYLike.from_dataframe('df', df, poisson=True)

    assert xy.is_poisson
Пример #7
0
def test_XYLike_chi2():

    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)

    # Verify that the fit converged where it should have
    assert np.allclose(
        res[0]["value"].values,
        [0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713],
        rtol=0.05,
    )

    # test not setting yerr

    xy = XYLike("test", x, y)

    assert np.all(xy.yerr == np.ones_like(y))

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)
Пример #8
0
def test_XYLike_poisson():

    # Now Poisson case
    y = np.array(poiss_sig)

    xy = XYLike("test", x, y, poisson_data=True)

    fitfun = Line() + Gaussian()

    fitfun.F_2 = 60.0
    fitfun.F_2.bounds = (0, 200.0)
    fitfun.mu_2 = 5.0
    fitfun.a_1.bounds = (0.1, 5.0)
    fitfun.b_1.bounds = (0.1, 100.0)

    res = xy.fit(fitfun)

    # Verify that the fit converged where it should have

    # print res[0]['value']
    assert np.allclose(res[0]["value"],
                       [0.783748, 40.344599, 71.560055, 4.989727, 0.330570],
                       rtol=0.05)
Пример #9
0
def test_XYLike_txt():

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # chi2 version

    xy = XYLike("test", x, y, yerr)

    fname = "test_txt.txt"

    xy.to_txt(fname)

    # read back in txt file

    new_xy = XYLike.from_text_file("txt", fname)

    assert not xy.is_poisson

    # poisson version

    xy = XYLike("test", x, y, poisson_data=True)

    fname = "test_txt_poisson.txt"

    xy.to_txt(fname)

    # read back in txt file

    new_xy = XYLike.from_text_file("txt", fname)

    assert new_xy.is_poisson

    # Remove files
    os.remove("test_txt.txt")
    os.remove("test_txt_poisson.txt")
Пример #10
0
def test_goodness_of_fit():


    # Let's generate some data with y = Powerlaw(x)

    gen_function = Powerlaw()

    # Generate a dataset using the power law, and a
    # constant 30% error

    x = np.logspace(0, 2, 50)

    xyl_generator = XYLike.from_function("sim_data", function=gen_function,
                                         x=x,
                                         yerr=0.3 * gen_function(x))

    y = xyl_generator.y
    y_err = xyl_generator.yerr

    fit_function = Powerlaw()

    xyl = XYLike("data", x, y, y_err)

    parameters, like_values = xyl.fit(fit_function)

    gof, all_results, all_like_values = xyl.goodness_of_fit()

    # Compute the number of degrees of freedom
    n_dof = len(xyl.x) - len(fit_function.free_parameters)

    # Get the observed value for chi2
    obs_chi2 = 2 * like_values['-log(likelihood)']['data']

    theoretical_gof = scipy.stats.chi2(n_dof).sf(obs_chi2)

    assert np.isclose(theoretical_gof, gof['total'], rtol=0.1)
Пример #11
0
def xy_model_and_datalist():

    y = np.array(poiss_sig)

    xy = XYLike("test", x, y, poisson_data=True)

    fitfun = Line() + Gaussian()

    fitfun.b_1.bounds = (-10, 10.0)
    fitfun.a_1.bounds = (-100, 100.0)
    fitfun.F_2 = 60.0
    fitfun.F_2.bounds = (1e-3, 200.0)
    fitfun.mu_2 = 5.0
    fitfun.mu_2.bounds = (0.0, 100.0)
    fitfun.sigma_2.bounds = (1e-3, 10.0)

    model = Model(PointSource("fake", 0.0, 0.0, fitfun))

    data = DataList(xy)

    return model, data
Пример #12
0
def test_xy_plot():
    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    xy.plot()

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)

    xy.plot()
Пример #13
0
def test_XYLike_txt():


    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # chi2 version

    xy = XYLike("test", x, y, yerr)

    fname = 'test_txt.txt'

    xy.to_txt(fname)

    # read back in txt file

    new_xy = XYLike.from_text_file('txt', fname )

    assert not xy.is_poisson

    # poisson version

    xy = XYLike("test", x, y, poisson_data=True)

    fname = 'test_txt_poisson.txt'

    xy.to_txt(fname)

    # read back in txt file

    new_xy = XYLike.from_text_file('txt', fname)

    assert new_xy.is_poisson

    # Remove files
    os.remove('test_txt.txt')
    os.remove('test_txt_poisson.txt')
Пример #14
0
    def generate_one(K):
        # Let's generate some data with y = Powerlaw(x)

        gen_function = Powerlaw()
        gen_function.K = K

        # Generate a dataset using the power law, and a
        # constant 30% error

        x = np.logspace(0, 2, 50)

        xyl_generator = XYLike.from_function("sim_data", function=gen_function,
                                             x=x,
                                             yerr=0.3 * gen_function(x))

        y = xyl_generator.y
        y_err = xyl_generator.yerr

        # xyl = XYLike("data", x, y, y_err)

        # xyl.plot(x_scale='log', y_scale='log')

        return x, y, y_err
Пример #15
0
def test_xy_plot():
    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    xy.plot()

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    res = xy.fit(fitfun)

    xy.plot()
Пример #16
0
def test_energy_time_fit():

    # Let's generate our dataset of 4 spectra with a normalization that follows
    # a powerlaw in time

    def generate_one(K):
        # Let's generate some data with y = Powerlaw(x)

        gen_function = Powerlaw()
        gen_function.K = K

        # Generate a dataset using the power law, and a
        # constant 30% error

        x = np.logspace(0, 2, 50)

        xyl_generator = XYLike.from_function("sim_data", function=gen_function,
                                             x=x,
                                             yerr=0.3 * gen_function(x))

        y = xyl_generator.y
        y_err = xyl_generator.yerr

        # xyl = XYLike("data", x, y, y_err)

        # xyl.plot(x_scale='log', y_scale='log')

        return x, y, y_err

    time_tags = np.array([1.0, 2.0, 5.0, 10.0])

    # This is the power law that defines the normalization as a function of time

    normalizations = 0.23 * time_tags ** (-1.2)

    datasets = map(generate_one, normalizations)

    # Now set up the fit and fit it

    time = IndependentVariable("time", 1.0, u.s)

    plugins = []

    for i, dataset in enumerate(datasets):
        x, y, y_err = dataset

        xyl = XYLike("data%i" % i, x, y, y_err)

        xyl.tag = (time, time_tags[i])

        assert xyl.tag == (time, time_tags[i], None)

        plugins.append(xyl)

    data = DataList(*plugins)

    spectrum = Powerlaw()
    spectrum.K.bounds = (0.01, 1000.0)

    src = PointSource("test", 0.0, 0.0, spectrum)

    model = Model(src)

    model.add_independent_variable(time)

    time_po = Powerlaw()
    time_po.K.bounds = (0.01, 1000)
    time_po.K.value = 2.0
    time_po.index = -1.5

    model.link(spectrum.K, time, time_po)

    jl = JointLikelihood(model, data)

    jl.set_minimizer("minuit")

    best_fit_parameters, likelihood_values = jl.fit()

    # Make sure we are within 10% of the expected result

    assert np.allclose(best_fit_parameters['value'].values, [0.25496115, -1.2282951 , -2.01508341], rtol=0.1)
Пример #17
0
def test_XYLike_assign_to_source():

    # Get fake data with Gaussian noise

    yerr = np.array(gauss_sigma)
    y = np.array(gauss_signal)

    # Fit

    xy = XYLike("test", x, y, yerr)

    xy.assign_to_source("pts1")

    fitfun = Line() + Gaussian()
    fitfun.F_2 = 60.0
    fitfun.mu_2 = 4.5

    fitfun2 = Line() + Gaussian()
    fitfun2.F_2 = 60.0
    fitfun2.mu_2 = 4.5

    pts1 = PointSource("pts1", ra=0.0, dec=0.0, spectral_shape=fitfun)
    pts2 = PointSource("pts2", ra=2.5, dec=3.2, spectral_shape=fitfun2)

    for parameter in list(fitfun2.parameters.values()):
        parameter.fix = True

    model = Model(pts1, pts2)
    data = DataList(xy)

    jl = JointLikelihood(model, data)

    _ = jl.fit()

    predicted_parameters = np.array(
        [0.82896119, 40.20269202, 62.80359114, 5.04080011, 0.27286713])

    assert np.allclose(
        [
            fitfun.a_1.value,
            fitfun.b_1.value,
            fitfun.F_2.value,
            fitfun.mu_2.value,
            fitfun.sigma_2.value,
        ],
        predicted_parameters,
        rtol=0.05,
    )

    # Test that the likelihood does not change by changing the parameters of the other source
    log_like_before = jl.minus_log_like_profile(*predicted_parameters)

    fitfun2.F_2 = 120.0

    log_like_after = jl.minus_log_like_profile(*predicted_parameters)

    assert log_like_before == log_like_after

    # Now test that if we do not assign a source, then the log likelihood value will change
    xy.assign_to_source(None)

    # Test that the likelihood this time changes by changing the parameters of the other source
    log_like_before = jl.minus_log_like_profile(*predicted_parameters)

    fitfun2.F_2 = 60.0

    log_like_after = jl.minus_log_like_profile(*predicted_parameters)

    assert log_like_before != log_like_after
Пример #18
0
def polyfit(x: Iterable[float], y: Iterable[float], grade: int, exposure: Iterable[float], bayes: bool = False) -> Tuple[Polynomial, float]:
    """ 
    function to fit a polynomial to data. 
    not a member to allow parallel computation

    :param x: the x coord of the data
    :param y: teh y coord of the data
    :param grade: the polynomical order or grade
    :param expousure: the exposure of the interval
    :param bayes: to do a bayesian fit or not


    """

    # Check that we have enough counts to perform the fit, otherwise
    # return a "zero polynomial"

    log.debug(f"starting polyfit with grade {grade} ")

    if threeML_config.time_series.default_fit_method is not None:

        bayes = threeML_config.time_series.default_fit_method
        log.debug("using a default poly fit method")

    nan_mask = np.isnan(y)

    y = y[~nan_mask]
    x = x[~nan_mask]
    exposure = exposure[~nan_mask]

    non_zero_mask = y > 0
    n_non_zero = non_zero_mask.sum()
    if n_non_zero == 0:

        log.debug("no counts, return 0")

        # No data, nothing to do!
        return Polynomial([0.0]*(grade+1)), 0.0

    # create 3ML plugins and fit them with 3ML!
    # should eventuallly allow better config

    # seelct the model based on the grade

    shape = _grade_model_lookup[grade]()

    ps = PointSource("_dummy", 0, 0, spectral_shape=shape)

    model = Model(ps)

    avg = np.mean(y/exposure)

    log.debug(f"starting polyfit with avg norm {avg}")

    with silence_console_log():

        xy = XYLike("series", x=x, y=y, exposure=exposure,
                    poisson_data=True, quiet=True)

        if not bayes:

            # make sure the model is positive

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)

                    v.value = avg

                else:

                    v.value = 0.0

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            jl: JointLikelihood = JointLikelihood(model, DataList(xy))

            jl.set_minimizer("minuit")

            # if the fit falis, retry and then just accept

            try:

                jl.fit(quiet=True)

            except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                log.debug("1st fit failed")

                try:

                    jl.fit(quiet=True)

                except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                    log.debug("all MLE fits failed")

                    pass

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            try:
                final_polynomial.set_covariace_matrix(
                    jl.results.covariance_matrix)

            except:

                log.exception(f"Fit failed in channel")
                raise FitFailed()

            min_log_likelihood = xy.get_log_like()

        else:

            # set smart priors

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)
                    v.prior = Log_normal(
                        mu=np.log(avg), sigma=np.max([np.log(avg/2), 1]))
                    v.value = 1

                else:

                    v.prior = Gaussian(mu=0, sigma=2)
                    v.value = 1e-2

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy))

            ba.set_sampler("emcee")

            ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20)

            ba.sample(quiet=True)

            ba.restore_median_fit()

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(
                ba.results.estimate_covariance_matrix())

            min_log_likelihood = xy.get_log_like()

    log.debug(f"-min loglike: {-min_log_likelihood}")

    return final_polynomial, -min_log_likelihood
Пример #19
0
def test_energy_time_fit():

    # Let's generate our dataset of 4 spectra with a normalization that follows
    # a powerlaw in time

    def generate_one(K):
        # Let's generate some data with y = Powerlaw(x)

        gen_function = Powerlaw()
        gen_function.K = K

        # Generate a dataset using the power law, and a
        # constant 30% error

        x = np.logspace(0, 2, 50)

        xyl_generator = XYLike.from_function("sim_data",
                                             function=gen_function,
                                             x=x,
                                             yerr=0.3 * gen_function(x))

        y = xyl_generator.y
        y_err = xyl_generator.yerr

        # xyl = XYLike("data", x, y, y_err)

        # xyl.plot(x_scale='log', y_scale='log')

        return x, y, y_err

    time_tags = np.array([1.0, 2.0, 5.0, 10.0])

    # This is the power law that defines the normalization as a function of time

    normalizations = 0.23 * time_tags**(-1.2)

    datasets = list(map(generate_one, normalizations))

    # Now set up the fit and fit it

    time = IndependentVariable("time", 1.0, u.s)

    plugins = []

    for i, dataset in enumerate(datasets):
        x, y, y_err = dataset

        xyl = XYLike("data%i" % i, x, y, y_err)

        xyl.tag = (time, time_tags[i])

        assert xyl.tag == (time, time_tags[i], None)

        plugins.append(xyl)

    data = DataList(*plugins)

    spectrum = Powerlaw()
    spectrum.K.bounds = (0.01, 1000.0)

    src = PointSource("test", 0.0, 0.0, spectrum)

    model = Model(src)

    model.add_independent_variable(time)

    time_po = Powerlaw()
    time_po.K.bounds = (0.01, 1000)
    time_po.K.value = 2.0
    time_po.index = -1.5

    model.link(spectrum.K, time, time_po)

    jl = JointLikelihood(model, data)

    jl.set_minimizer("minuit")

    best_fit_parameters, likelihood_values = jl.fit()

    # Make sure we are within 10% of the expected result

    assert np.allclose(
        best_fit_parameters["value"].values,
        [0.25496115, -1.2282951, -2.01508341],
        rtol=0.1,
    )