def _compute_data(self, cand): """Fit the candidate's profile with multiple gaussian components and return the fit's parameters. Input: cand: A ratings2.0 Candidate object. Output: multigaussfit: The corresponding fit. A MultiGaussFit object. """ prof = cand.get_from_cache('profile') pfd = cand.get_from_cache('pfd') data = utils.get_scaled_profile(prof, pfd.varprof) # Initialize some starting values nbins = len(data) ngaussians = 0 # After normalization the first parameter (offset) should be close to zero prev_params = [0.0] # Nothing fit yet, so residuals are just the data values prev_residuals = data - np.zeros_like(data) # No need to normalize chi^2 by variance since we already did that to the # data prev_chi2 = sum(prev_residuals * prev_residuals) prev_dof = nbins fit = True # We will now start fitting Gaussian profile components until the # additional components are no longer statistically needed to improve the # fit. The starting parameter guesses for each new component will come # from the highest remaining residual and from the previous best-fit values # for previous components while fit: ngaussians += 1 # Update values based on results of previous run trial_params = list(prev_params) # Guess the parameters for the next profile component amplitude = max(prev_residuals) # Base FWHM on stats.norm normalization fwhm = 2 * np.sqrt( 2 * np.log(2)) / (np.sqrt(2 * np.pi) * amplitude) phase = np.argmax(prev_residuals) / float(nbins) trial_params.append(amplitude) trial_params.append(fwhm) trial_params.append(phase) if self.USE_MPFIT: # params_dict is used by mpfit to get initial values and constraints on # parameters params_dict = [] for ii, param in enumerate(trial_params): if ii == 0: # The first parameter is the offset, which can be negative and # should be allowed to vary more params_dict.append({ "value": param, "fixed": False, "limited": [False, False], "limits": [0.0, 0.0] }) elif (ii - 1) % 3 == 1: # This is the FWHM, and is allowed to vary between # 1/nbins and 1.0 params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [1.0 / nbins, 1.0] }) else: # Limits are set assuming that our initial guesses were correct # to within 25%... params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [0.25 * param, 1.75 * param] }) # Define the fitting function for mpfit def func(params, fjac=None, errs=None): fit = utils.multigaussfit_from_paramlist(params) # Return values are [status, residuals] return [0, fit.get_resids(data)] # Now fit mpfit_out = mpfit.mpfit(func, parinfo=params_dict, quiet=True) # Store the new best-fit parameters new_params = mpfit_out.params else: import scipy.optimize def func(params): #print "DEBUG: params", params fit = utils.multigaussfit_from_paramlist(params) return fit.get_resids(data) new_params, status = scipy.optimize.leastsq(func, trial_params) if status not in (1, 2, 3, 4): raise utils.RatingError("Status returned by " \ "scipy.optimize.leastsq (%d) " \ "indicates the fit failed!" % status) # Calculate the new residuals and statistics new_fit = utils.multigaussfit_from_paramlist(new_params) #print "DEBUG: new_fit", new_fit new_residuals = new_fit.get_resids(data) new_chi2 = new_fit.get_chisqr(data) new_dof = new_fit.get_dof(len(data)) # Degrees-of-freedom # Calculate the F-statistic for the fit, i.e. the probability that the # additional profile component is /not/ required by the data F_stat = psr_utils.Ftest(prev_chi2, prev_dof, \ new_chi2, new_dof) # If the F-test probability is greater than some threshold, then the # additional Gaussian did not significantly improve the fit and we # should stop. The nan test is needed because if the fit is /worse/ # then Ftest doesn't return a valid number. Also stop if we reach # the maximum number of Gaussian profile components. Stop if the # fwhm of the added component is greater than 1.0 if F_stat > self.F_stat_threshold or np.isnan(F_stat) \ or ngaussians > self.max_gaussians \ or new_fit.components[-1].fwhm > 1.0 \ or new_fit.components[-1].fwhm < 1.0/nbins: fit = False # Otherwise, keep fitting and update the parameters for the next pass else: fit = True prev_params = new_params prev_residuals = new_residuals prev_chi2 = new_chi2 prev_dof = new_dof # We stop when a fit is no longer needed, so we have to return the values # from the /previous/ run (otherwise we return the unneeded fit) #print "DEBUG: prev_params", prev_params finalfit = utils.multigaussfit_from_paramlist(prev_params) #print "DEBUG: finalfit", finalfit return finalfit
def fit_vonmises(data, data_avg, data_std, max_vonmises, F_stat_threshold): """ Fit one or more von Mises profiles to data using mpfit. Parameters ---------- data : ndarray The data to fit data_avg : float The average of the data (used for normalization) data_std : float The standard deviation of the data (used for normalization) max_vonmises : int The maximum number of von Mises profile components to fit F_stat_threshold : float The threshold probability that a profile component is not required for rejecting more components Returns ------- params : list The best fit von Mises parameters. The first entry is the offset from zero baseline and the remaining entries are amplitudes, concentrations, and phases, i.e. params = [offset, amp_1, conc_1, phase_1, amp_2, conc_2, phase_2, etc.] nvonmises : int The number of von Mises profile components used to fit the data red_chi2 : float The reduced chi^2 of the fit """ # Normalize the data to have average = 0 and std_dev = 1 (It would be best # to do so based on values for the off-pulse region, but we don't know what # that is...) data -= data_avg data /= data_std # Initialize some starting values nbins = len(data) nvonmises = 0 # After normalization the first parameter (offset) should be close to zero prev_params = [0.0] # Nothing fit yet, so residuals are just the data values prev_residuals = data - N.zeros_like(data) # No need to normalize chi^2 by variance since we already did that to the # data prev_chi2 = sum(prev_residuals * prev_residuals) prev_dof = nbins fit = True # We will now start fitting von Mises profile components until the # additional components are no longer statistically needed to improve the # fit. The starting parameter guesses for each new component will come # from the highest remaining residual and from the previous best-fit values # for previous components while fit: nvonmises += 1 # Update values based on results of previous run trial_params = list(prev_params) # Guess the parameters for the next profile component amplitude = max(prev_residuals) # Assume a concentration appropriate for a FWHM of 0.075 (in phase) concentration = 25.0 location = N.argmax(prev_residuals) / float(nbins) trial_params.append(amplitude) trial_params.append(concentration) trial_params.append(location) # params_dict is used by mpfit to get initial values and constraints on # parameters params_dict = [] for ii, param in enumerate(trial_params): if ii == 0: # The first parameter is the offset, which can be negative and # should be allowed to vary more params_dict.append({ "value": param, "fixed": False, "limited": [False, False], "limits": [0.0, 0.0] }) elif (ii - 1) % 3 == 1: # This is the concentration, and is allowed to vary between # values appropriate for FWHMs of ~0.015 to ~0.5 params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [0.1, 600.0] }) else: # Limits are set assuming that our initial guesses were correct # to within 25%... params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [0.25 * param, 1.75 * param] }) # Define the fitting function for mpfit def func(params, fjac=None, errs=None): # Return values are [status, residuals] return [0, data - make_vonmises(params, nbins)] # Now fit mpfit_out = mpfit(func, parinfo=params_dict, quiet=True) # Store the new best-fit parameters new_params = mpfit_out.params # Calculate the new residuals and statistics new_residuals = data - make_vonmises(new_params, nbins) new_chi2 = mpfit_out.fnorm new_dof = nbins - len(new_params) # Degrees-of-freedom # Calculate the F-statistic for the fit, i.e. the probability that the # additional profile component is /not/ required by the data F_stat = PU.Ftest(prev_chi2, prev_dof, new_chi2, new_dof) # If the F-test probability is greater than some threshold, then the # additional Gaussian did not significantly improve the fit and we # should stop. The nan test is needed because if the fit is /worse/ # then Ftest doesn't return a valid number. Also stop if we reach # the maximum number of Gaussian profile components if F_stat > F_stat_threshold or N.isnan(F_stat) \ or nvonmises > max_vonmises: fit = False # Otherwise, keep fitting and update the parameters for the next pass else: fit = True prev_params = new_params prev_residuals = new_residuals prev_chi2 = new_chi2 prev_dof = new_dof # We stop when a fit is no longer needed, so we have to return the values # from the /previous/ run (otherwise we return the unneeded fit) return prev_params, prev_chi2 / prev_dof, nvonmises - 1