Exemplo n.º 1
0
    def _compute_data(self, cand):
        """Fit the candidate's profile with multiple gaussian
            components and return the fit's parameters.

            Input:
                cand: A ratings2.0 Candidate object.

            Output:
                multigaussfit: The corresponding fit. A MultiGaussFit object.
        """
        prof = cand.get_from_cache('profile')
        pfd = cand.get_from_cache('pfd')
        data = utils.get_scaled_profile(prof, pfd.varprof)

        # Initialize some starting values
        nbins = len(data)
        ngaussians = 0
        # After normalization the first parameter (offset) should be close to zero
        prev_params = [0.0]
        # Nothing fit yet, so residuals are just the data values
        prev_residuals = data - np.zeros_like(data)
        # No need to normalize chi^2 by variance since we already did that to the
        # data
        prev_chi2 = sum(prev_residuals * prev_residuals)
        prev_dof = nbins
        fit = True

        # We will now start fitting Gaussian profile components until the
        # additional components are no longer statistically needed to improve the
        # fit.  The starting parameter guesses for each new component will come
        # from the highest remaining residual and from the previous best-fit values
        # for previous components
        while fit:
            ngaussians += 1
            # Update values based on results of previous run
            trial_params = list(prev_params)

            # Guess the parameters for the next profile component
            amplitude = max(prev_residuals)
            # Base FWHM on stats.norm normalization
            fwhm = 2 * np.sqrt(
                2 * np.log(2)) / (np.sqrt(2 * np.pi) * amplitude)
            phase = np.argmax(prev_residuals) / float(nbins)
            trial_params.append(amplitude)
            trial_params.append(fwhm)
            trial_params.append(phase)
            if self.USE_MPFIT:
                # params_dict is used by mpfit to get initial values and constraints on
                # parameters
                params_dict = []
                for ii, param in enumerate(trial_params):
                    if ii == 0:
                        # The first parameter is the offset, which can be negative and
                        # should be allowed to vary more
                        params_dict.append({
                            "value": param,
                            "fixed": False,
                            "limited": [False, False],
                            "limits": [0.0, 0.0]
                        })
                    elif (ii - 1) % 3 == 1:
                        # This is the FWHM, and is allowed to vary between
                        # 1/nbins and 1.0
                        params_dict.append({
                            "value": param,
                            "fixed": False,
                            "limited": [True, True],
                            "limits": [1.0 / nbins, 1.0]
                        })
                    else:
                        # Limits are set assuming that our initial guesses were correct
                        # to within 25%...
                        params_dict.append({
                            "value":
                            param,
                            "fixed":
                            False,
                            "limited": [True, True],
                            "limits": [0.25 * param, 1.75 * param]
                        })

                # Define the fitting function for mpfit
                def func(params, fjac=None, errs=None):
                    fit = utils.multigaussfit_from_paramlist(params)
                    # Return values are [status, residuals]
                    return [0, fit.get_resids(data)]

                # Now fit
                mpfit_out = mpfit.mpfit(func, parinfo=params_dict, quiet=True)
                # Store the new best-fit parameters
                new_params = mpfit_out.params
            else:
                import scipy.optimize

                def func(params):
                    #print "DEBUG: params", params
                    fit = utils.multigaussfit_from_paramlist(params)
                    return fit.get_resids(data)

                new_params, status = scipy.optimize.leastsq(func, trial_params)
                if status not in (1, 2, 3, 4):
                    raise utils.RatingError("Status returned by " \
                                        "scipy.optimize.leastsq (%d) " \
                                        "indicates the fit failed!" % status)

            # Calculate the new residuals and statistics
            new_fit = utils.multigaussfit_from_paramlist(new_params)
            #print "DEBUG: new_fit", new_fit
            new_residuals = new_fit.get_resids(data)
            new_chi2 = new_fit.get_chisqr(data)
            new_dof = new_fit.get_dof(len(data))  # Degrees-of-freedom
            # Calculate the F-statistic for the fit, i.e. the probability that the
            # additional profile component is /not/ required by the data
            F_stat        = psr_utils.Ftest(prev_chi2, prev_dof, \
                                                new_chi2, new_dof)

            # If the F-test probability is greater than some threshold, then the
            # additional Gaussian did not significantly improve the fit and we
            # should stop.  The nan test is needed because if the fit is /worse/
            # then Ftest doesn't return a valid number.  Also stop if we reach
            # the maximum number of Gaussian profile components. Stop if the
            # fwhm of the added component is greater than 1.0
            if F_stat > self.F_stat_threshold or np.isnan(F_stat) \
                   or ngaussians > self.max_gaussians \
                   or new_fit.components[-1].fwhm > 1.0 \
                   or new_fit.components[-1].fwhm < 1.0/nbins:
                fit = False
            # Otherwise, keep fitting and update the parameters for the next pass
            else:
                fit = True
                prev_params = new_params
                prev_residuals = new_residuals
                prev_chi2 = new_chi2
                prev_dof = new_dof

        # We stop when a fit is no longer needed, so we have to return the values
        # from the /previous/ run (otherwise we return the unneeded fit)
        #print "DEBUG: prev_params", prev_params
        finalfit = utils.multigaussfit_from_paramlist(prev_params)
        #print "DEBUG: finalfit", finalfit
        return finalfit
Exemplo n.º 2
0
def fit_vonmises(data, data_avg, data_std, max_vonmises, F_stat_threshold):
    """
    Fit one or more von Mises profiles to data using mpfit.

    Parameters
    ----------
    data : ndarray
        The data to fit
    data_avg : float
        The average of the data (used for normalization)
    data_std : float
        The standard deviation of the data (used for normalization)
    max_vonmises : int
        The maximum number of von Mises profile components to fit
    F_stat_threshold : float
        The threshold probability that a profile component is not required for
        rejecting more components

    Returns
    -------
    params : list
        The best fit von Mises parameters.  The first entry is the offset from
        zero baseline and the remaining entries are amplitudes, concentrations,
        and phases, i.e.
        params = [offset, amp_1, conc_1, phase_1, amp_2, conc_2, phase_2, etc.]
    nvonmises : int
        The number of von Mises profile components used to fit the data
    red_chi2 : float
        The reduced chi^2 of the fit
    """
    # Normalize the data to have average = 0 and std_dev = 1 (It would be best
    # to do so based on values for the off-pulse region, but we don't know what
    # that is...)
    data -= data_avg
    data /= data_std

    # Initialize some starting values
    nbins = len(data)
    nvonmises = 0
    # After normalization the first parameter (offset) should be close to zero
    prev_params = [0.0]
    # Nothing fit yet, so residuals are just the data values
    prev_residuals = data - N.zeros_like(data)
    # No need to normalize chi^2 by variance since we already did that to the
    # data
    prev_chi2 = sum(prev_residuals * prev_residuals)
    prev_dof = nbins
    fit = True

    # We will now start fitting von Mises profile components until the
    # additional components are no longer statistically needed to improve the
    # fit.  The starting parameter guesses for each new component will come
    # from the highest remaining residual and from the previous best-fit values
    # for previous components
    while fit:
        nvonmises += 1
        # Update values based on results of previous run
        trial_params = list(prev_params)

        # Guess the parameters for the next profile component
        amplitude = max(prev_residuals)
        # Assume a concentration appropriate for a FWHM of 0.075 (in phase)
        concentration = 25.0
        location = N.argmax(prev_residuals) / float(nbins)
        trial_params.append(amplitude)
        trial_params.append(concentration)
        trial_params.append(location)
        # params_dict is used by mpfit to get initial values and constraints on
        # parameters
        params_dict = []
        for ii, param in enumerate(trial_params):
            if ii == 0:
                # The first parameter is the offset, which can be negative and
                # should be allowed to vary more
                params_dict.append({
                    "value": param,
                    "fixed": False,
                    "limited": [False, False],
                    "limits": [0.0, 0.0]
                })
            elif (ii - 1) % 3 == 1:
                # This is the concentration, and is allowed to vary between
                # values appropriate for FWHMs of ~0.015 to ~0.5
                params_dict.append({
                    "value": param,
                    "fixed": False,
                    "limited": [True, True],
                    "limits": [0.1, 600.0]
                })
            else:
                # Limits are set assuming that our initial guesses were correct
                # to within 25%...
                params_dict.append({
                    "value": param,
                    "fixed": False,
                    "limited": [True, True],
                    "limits": [0.25 * param, 1.75 * param]
                })

        # Define the fitting function for mpfit
        def func(params, fjac=None, errs=None):
            # Return values are [status, residuals]
            return [0, data - make_vonmises(params, nbins)]

        # Now fit
        mpfit_out = mpfit(func, parinfo=params_dict, quiet=True)
        # Store the new best-fit parameters
        new_params = mpfit_out.params
        # Calculate the new residuals and statistics
        new_residuals = data - make_vonmises(new_params, nbins)
        new_chi2 = mpfit_out.fnorm
        new_dof = nbins - len(new_params)  # Degrees-of-freedom
        # Calculate the F-statistic for the fit, i.e. the probability that the
        # additional profile component is /not/ required by the data
        F_stat = PU.Ftest(prev_chi2, prev_dof, new_chi2, new_dof)

        # If the F-test probability is greater than some threshold, then the
        # additional Gaussian did not significantly improve the fit and we
        # should stop.  The nan test is needed because if the fit is /worse/
        # then Ftest doesn't return a valid number.  Also stop if we reach
        # the maximum number of Gaussian profile components
        if F_stat > F_stat_threshold or N.isnan(F_stat) \
               or nvonmises > max_vonmises:
            fit = False
        # Otherwise, keep fitting and update the parameters for the next pass
        else:
            fit = True
            prev_params = new_params
            prev_residuals = new_residuals
            prev_chi2 = new_chi2
            prev_dof = new_dof

    # We stop when a fit is no longer needed, so we have to return the values
    # from the /previous/ run (otherwise we return the unneeded fit)
    return prev_params, prev_chi2 / prev_dof, nvonmises - 1