Exemplo n.º 1
0
    def go(self,
           continue_on_failure=True,
           compute_covariance=False,
           verbose=False,
           **options_for_parallel_computation):

        # Generate the data frame which will contain all results

        self._continue_on_failure = continue_on_failure

        self._compute_covariance = compute_covariance

        # let's iterate, perform the fit and fill the data frame

        if threeML_config["parallel"]["use_parallel"]:

            # Parallel computation

            with silence_console_log(and_progress_bars=False):
                client = ParallelClient(**options_for_parallel_computation)

                results = client.execute_with_progress_bar(
                    self.worker, list(range(self._n_iterations)))

        else:

            # Serial computation

            results = []

            with silence_console_log(and_progress_bars=False):

                for i in trange(self._n_iterations,
                                desc="Goodness of fit computation"):

                    results.append(self.worker(i))

        assert len(results) == self._n_iterations, (
            "Something went wrong, I have %s results "
            "for %s intervals" % (len(results), self._n_iterations))

        # Store the results in the data frames

        parameter_frames = pd.concat([x[0] for x in results],
                                     keys=list(range(self._n_iterations)))
        like_frames = pd.concat([x[1] for x in results],
                                keys=list(range(self._n_iterations)))

        # Store a list with all results (this is a list of lists, each list contains the results for the different
        # iterations for the same model)
        self._all_results = []

        for i in range(self._n_models):

            this_model_results = [x[2][i] for x in results]

            self._all_results.append(AnalysisResultsSet(this_model_results))

        return parameter_frames, like_frames
            def worker(counts):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                return polynomial
    def _fit_polynomials(self, bayes=False):
        """
        fits a polynomial to all channels over the input time intervals

        :param fit_intervals: str input intervals
        :return:
        """

        # mark that we have fit a poly now

        self._poly_fit_exists = True

        # we need to adjust the selection to the true intervals of the time-binned spectra

        tmp_poly_intervals = self._poly_intervals
        poly_intervals = self._adjust_to_true_intervals(tmp_poly_intervals)
        self._poly_intervals = poly_intervals

        # now lets get all the counts, exposure and midpoints for the
        # selection

        selected_counts = []
        selected_exposure = []
        selected_midpoints = []

        for selection in poly_intervals:

            # get the mask of these bins

            mask = self._select_bins(selection.start_time, selection.stop_time)

            # the counts will be (time, channel) here,
            # so the mask is selecting time.
            # a sum along axis=0 is a sum in time, while axis=1 is a sum in energy

            selected_counts.extend(
                self._binned_spectrum_set.counts_per_bin[mask])

            selected_exposure.extend(
                self._binned_spectrum_set.exposure_per_bin[mask])
            selected_midpoints.extend(
                self._binned_spectrum_set.time_intervals.mid_points[mask]
            )

        selected_counts = np.array(selected_counts)
        selected_midpoints = np.array(selected_midpoints)
        selected_exposure = np.array(selected_exposure)

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._fit_global_and_determine_optimum_grade(
                    selected_counts.sum(axis=1),
                    selected_midpoints,
                    selected_exposure,
                    bayes=bayes,
                )
            )

            log.info(
                "Auto-determined polynomial order: %d"
                % self._optimal_polynomial_grade
            )

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        if threeML_config["parallel"]["use_parallel"]:

            def worker(counts):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker, selected_counts.T, name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            # now fit the light curve of each channel
            # and save the estimated polynomial

            for counts in tqdm(
                selected_counts.T, desc=f"Fitting {self._instrument} background"
            ):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                    polynomials.append(polynomial)

        self._polynomials = polynomials
Exemplo n.º 4
0
def compute_ppc(analysis: BayesianAnalysis,
                result: BayesianResults,
                n_sims: int,
                file_name: str,
                overwrite: bool = False,
                return_ppc: bool = False) -> Union["PPC", None]:
    """
    Compute a posterior predictive check from a 3ML DispersionLike
    Plugin. The resulting posterior data simulations are stored
    in an HDF5 file which can be read by the PPC class

    :param analysis: 3ML bayesian analysis object
    :param result: 3ML analysis result
    :param n_sims: the number of posterior simulations to create
    :param file_name: the filename to save to
    :param overwrite: to overwrite an existsing file
    :param return_ppc: if true, PPC object will be return directy
    :returns: None
    :rtype:

    """

    update_logging_level("WARNING")

    p = Path(file_name)

    if p.exists() and (not overwrite):

        raise RuntimeError(f"{file_name} already exists!")

    with h5py.File(file_name, 'w', libver='latest') as database:

        # first we collect the real data data and save it so that we will not have to
        # look it up in the future

        data_names = []

        database.attrs['n_sims'] = n_sims

        for data in analysis.data_list.values():

            data_names.append(data.name)
            grp = database.create_group(data.name)
            grp.attrs['exposure'] = data.exposure
            grp.create_dataset('ebounds',
                               data=data.response.ebounds,
                               compression='lzf')
            grp.create_dataset('obs_counts',
                               data=data.observed_counts,
                               compression='lzf')
            grp.create_dataset('bkg_counts',
                               data=data.background_counts,
                               compression='lzf')
            grp.create_dataset('mask', data=data.mask, compression='lzf')

        # select random draws from the posterior

        n_samples = len(result.samples.T)

        if n_samples < n_sims:

            print("too many sims")

            n_sims = n_samples

        choices = np.random.choice(len(result.samples.T),
                                   replace=False,
                                   size=n_sims)

        # for each posterior sample

        with silence_console_log(and_progress_bars=False):

            for j, choice in enumerate(tqdm(choices,
                                            desc="sampling posterior")):

                # get the parameters of the choice

                params = result.samples.T[choice]

                # set the analysis free parameters to the value of the posterior
                for i, (k, v) in enumerate(
                        analysis.likelihood_model.free_parameters.items()):
                    v.value = params[i]

                # create simulated data sets with these free parameters
                sim_dl = DataList(*[
                    data.get_simulated_dataset()
                    for data in analysis.data_list.values()
                ])

                # set the model of the simulated data to the model of the simulation
                for i, data in enumerate(sim_dl.values()):

                    # clone the model for saftey's sake
                    # and set the model. For now we do nothing with this

                    data.set_model(clone_model(analysis.likelihood_model))

                    # store the PPC data in the file
                    grp = database[data_names[i]]
                    grp.create_dataset('ppc_counts_%d' % j,
                                       data=data.observed_counts,
                                       compression='lzf')
                    grp.create_dataset('ppc_background_counts_%d' % j,
                                       data=data.background_counts,
                                       compression='lzf')
                # sim_dls.append(sim_dl)
        if return_ppc:

            return PPC(file_name)
Exemplo n.º 5
0
def unbinned_polyfit(events: Iterable[float], grade: int, t_start: float, t_stop: float, exposure: float, bayes: bool) -> Tuple[Polynomial, float]:
    """
    function to fit a polynomial to unbinned event data. 
    not a member to allow parallel computation

    :param events: the events to fit
    :param grade: the polynomical order or grade
    :param t_start: the start time to fit over
    :param t_stop: the end time to fit over
    :param expousure: the exposure of the interval
    :param bayes: to do a bayesian fit or not

    """

    log.debug(f"starting unbinned_polyfit with grade {grade}")
    log.debug(f"have {len(events)} events with {exposure} exposure")

    # create 3ML plugins and fit them with 3ML!
    # should eventuallly allow better config

    # select the model based on the grade

    if threeML_config.time_series.default_fit_method is not None:

        bayes = threeML_config.time_series.default_fit_method
        log.debug("using a default poly fit method")

    if len(events) == 0:

        log.debug("no events! returning zero")

        return Polynomial([0] * (grade + 1)), 0

    shape = _grade_model_lookup[grade]()

    with silence_console_log():

        ps = PointSource("dummy", 0, 0, spectral_shape=shape)

        model = Model(ps)

        observation = EventObservation(events, exposure, t_start, t_stop)

        xy = UnbinnedPoissonLike("series", observation=observation)

        if not bayes:

            # make sure the model is positive

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)

                    v.value = 10

                else:

                    v.value = 0.0

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            jl: JointLikelihood = JointLikelihood(model, DataList(xy))

            grid_minimizer = GlobalMinimization("grid")

            local_minimizer = LocalMinimization("minuit")

            my_grid = {
                model.dummy.spectrum.main.shape.a: np.logspace(0, 3, 10)}

            grid_minimizer.setup(
                second_minimization=local_minimizer, grid=my_grid)

            jl.set_minimizer(grid_minimizer)

            # if the fit falis, retry and then just accept

            try:

                jl.fit(quiet=True)

            except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                try:

                    jl.fit(quiet=True)

                except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                    log.debug("all MLE fits failed, returning zero")

                    return Polynomial([0]*(grade + 1)), 0

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(jl.results.covariance_matrix)

            min_log_likelihood = xy.get_log_like()

        else:

            # set smart priors

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)
                    v.prior = Log_normal(mu=np.log(5), sigma=np.log(5))
                    v.value = 1

                else:

                    v.prior = Gaussian(mu=0, sigma=.5)
                    v.value = 0.1

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy))

            ba.set_sampler("emcee")

            ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20)

            ba.sample(quiet=True)

            ba.restore_median_fit()

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(
                ba.results.estimate_covariance_matrix())

            min_log_likelihood = xy.get_log_like()

    log.debug(f"-min loglike: {-min_log_likelihood}")

    return final_polynomial, -min_log_likelihood
Exemplo n.º 6
0
def polyfit(x: Iterable[float], y: Iterable[float], grade: int, exposure: Iterable[float], bayes: bool = False) -> Tuple[Polynomial, float]:
    """ 
    function to fit a polynomial to data. 
    not a member to allow parallel computation

    :param x: the x coord of the data
    :param y: teh y coord of the data
    :param grade: the polynomical order or grade
    :param expousure: the exposure of the interval
    :param bayes: to do a bayesian fit or not


    """

    # Check that we have enough counts to perform the fit, otherwise
    # return a "zero polynomial"

    log.debug(f"starting polyfit with grade {grade} ")

    if threeML_config.time_series.default_fit_method is not None:

        bayes = threeML_config.time_series.default_fit_method
        log.debug("using a default poly fit method")

    nan_mask = np.isnan(y)

    y = y[~nan_mask]
    x = x[~nan_mask]
    exposure = exposure[~nan_mask]

    non_zero_mask = y > 0
    n_non_zero = non_zero_mask.sum()
    if n_non_zero == 0:

        log.debug("no counts, return 0")

        # No data, nothing to do!
        return Polynomial([0.0]*(grade+1)), 0.0

    # create 3ML plugins and fit them with 3ML!
    # should eventuallly allow better config

    # seelct the model based on the grade

    shape = _grade_model_lookup[grade]()

    ps = PointSource("_dummy", 0, 0, spectral_shape=shape)

    model = Model(ps)

    avg = np.mean(y/exposure)

    log.debug(f"starting polyfit with avg norm {avg}")

    with silence_console_log():

        xy = XYLike("series", x=x, y=y, exposure=exposure,
                    poisson_data=True, quiet=True)

        if not bayes:

            # make sure the model is positive

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)

                    v.value = avg

                else:

                    v.value = 0.0

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            jl: JointLikelihood = JointLikelihood(model, DataList(xy))

            jl.set_minimizer("minuit")

            # if the fit falis, retry and then just accept

            try:

                jl.fit(quiet=True)

            except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                log.debug("1st fit failed")

                try:

                    jl.fit(quiet=True)

                except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                    log.debug("all MLE fits failed")

                    pass

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            try:
                final_polynomial.set_covariace_matrix(
                    jl.results.covariance_matrix)

            except:

                log.exception(f"Fit failed in channel")
                raise FitFailed()

            min_log_likelihood = xy.get_log_like()

        else:

            # set smart priors

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)
                    v.prior = Log_normal(
                        mu=np.log(avg), sigma=np.max([np.log(avg/2), 1]))
                    v.value = 1

                else:

                    v.prior = Gaussian(mu=0, sigma=2)
                    v.value = 1e-2

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy))

            ba.set_sampler("emcee")

            ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20)

            ba.sample(quiet=True)

            ba.restore_median_fit()

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(
                ba.results.estimate_covariance_matrix())

            min_log_likelihood = xy.get_log_like()

    log.debug(f"-min loglike: {-min_log_likelihood}")

    return final_polynomial, -min_log_likelihood