def _compute_rating(self, cand): """Return a rating for the candidate. The rating value is the ratio of the width of the narrowest gaussian component to the DM smearing. Input: cand: A Candidate object to rate. Output: value: The rating value. """ pfd = cand.pfd mgauss = cand.multigaussfit ncomp = len(mgauss.components) if not ncomp: raise utils.RatingError("Bad number of components for single " \ "gaussian fit (%d)" % ncomp) # Get the period period = pfd.bary_p1 or pfd.topo_p1 if period is None: raise utils.RatingError("Bad period in PFD file (%f)" % period) f_ctr = (pfd.hifreq + pfd.lofreq) / 2.0 dm_smear = psr_utils.dm_smear(pfd.bestdm, pfd.chan_wid, f_ctr) width_phs = np.sqrt(dm_smear**2 + pfd.dt**2) / period minfwhm = min([comp.fwhm for comp in mgauss.components]) return width_phs / minfwhm
def get_onpulse_region(self, nbins): """Return a tuple of phases that represent the on-pulse window. Inputs: nbins: Number of phase bins. Output: onpulse: A tuple of phases, between which are the on-pulse region. """ if not self.components: raise utils.RatingError("Multi-Gauss fit has no components " \ "(i.e. no on-pulse region)") onpulse_region = np.zeros(nbins, dtype=bool) for comp in self.components: onpulse_region |= comp.get_onpulse_region(nbins) return onpulse_region
def _compute_data(self, cand): """Fit the candidate's profile with multiple gaussian components and return the fit's parameters. Input: cand: A ratings2.0 SPCandidate object. Output: multigaussfit: The corresponding fit. A MultiGaussFit object. """ data = utils.get_scaled_profile(cand.profile, cand.spd.varprof) # Initialize some starting values nbins = len(data) trial_params = [0.0] amplitude = max(data[(0.1 * nbins):(0.4 * nbins)]) fwhm = 0.02 # full window should be 50 times estimated pulse width phase = 0.25 # this is where the single pulse should be placed trial_params.append(amplitude) trial_params.append(fwhm) trial_params.append(phase) from scipy.optimize import leastsq def func(params): #print "DEBUG: params", params # since this is single gaussian, params is just [offset, amp, std, phs] fit = utils.multigaussfit_from_paramlist(params) return fit.get_resids(data) new_params, status = leastsq(func, trial_params) if status not in (1, 2, 3, 4): raise utils.RatingError("Status returned by " \ "scipy.optimize.leastsq (%d) " \ "indicates the fit failed!" % status) new_fit = utils.multigaussfit_from_paramlist(new_params) return new_fit
def get_onpulse_region(self, nbins): """Return a tuple of phases that represent the on-pulse window. Inputs: nbins: Number of phase bins. Output: onpulse: A tuple of phases, between which are the on-pulse region. """ # Determine fudge factor depending on width if self.fwhm < 0.1: fudge_factor = 4.0 elif self.fwhm < 0.2: fudge_factor = 2.0 elif self.fwhm < 0.4: fudge_factor = 1.5 else: fudge_factor = 1.5 if self.fwhm * fudge_factor > 1.0: raise utils.RatingError( "Fudge factored FWHM is larger than 1.0 in phase") start_phase = self.phs - (self.fwhm * fudge_factor) / 2.0 end_phase = self.phs + (self.fwhm * fudge_factor) / 2.0 start_phase %= 1 end_phase %= 1 start_bin = int(start_phase * nbins + 0.5) # Round to nearest integer end_bin = int(end_phase * nbins + 0.5) # Round to nearest integer onpulse_length = (end_bin - start_bin) % nbins onpulse_indices = np.arange(start_bin, start_bin + onpulse_length) % nbins onpulse_region = np.zeros(nbins, dtype=bool) onpulse_region[onpulse_indices] = True return onpulse_region
def main(): if args.num_procs > 1: warning.warn("Multithreading not implemnted (%d threads requested)" % \ args.num_procs) if not args.raters: print "No raters are loaded." args.list_raters = True if args.list_raters: utils.print_raters_list(args.verbosity) sys.exit(0) rat_inst_id_cache = utils.RatingInstanceIDCache(DBNAME) loaded_raters = {} for rater_name in args.raters: rater_module = getattr(raters, rater_name) rater = rater_module.Rater() loaded_raters[(rater.long_name, rater.version)] = rater db = database.Database(DBNAME) try: for rater in loaded_raters.values(): rating_instance_id = rat_inst_id_cache.get_id(rater.long_name, \ rater.version, \ rater.description) header_ids = get_beams_to_rate(rating_instance_id) print "For rater %s have %d beams to rate." % (rater.long_name,len(header_ids)) for header_id in header_ids: # For candidates with this header_id find which current ratings # are not computed. # # NOTE: We use 'r.pdm_rating_instance_id' in the WHERE clause # because it will be NULL if a rating does not exist in # the 'pdm_rating' table. However, it _will_ be set if the rating # exists, but has a value of NULL (i.e. the rating failed). # If we were used 'r.value' instead, we would try to re-compute # failed ratings. query = "SELECT c.pdm_cand_id, " \ "rt.name, " \ "ri.version " \ "FROM pdm_candidates AS c WITH(NOLOCK) " \ "CROSS JOIN (SELECT rt.pdm_rating_type_id, " \ "MAX(ri.pdm_rating_instance_id) " \ "AS current_instance_id " \ "FROM pdm_rating_instance AS ri WITH(NOLOCK) " \ "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \ "ON ri.pdm_rating_type_id=rt.pdm_rating_type_id " \ "GROUP BY rt.pdm_rating_type_id) AS ci " \ "LEFT JOIN pdm_rating_instance AS ri WITH(NOLOCK) " \ "ON ri.pdm_rating_instance_id=ci.current_instance_id " \ "LEFT JOIN pdm_rating AS r WITH(NOLOCK) " \ "ON r.pdm_cand_id=c.pdm_cand_id " \ "AND ri.pdm_rating_instance_id=r.pdm_rating_instance_id " \ "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \ "ON rt.pdm_rating_type_id=ri.pdm_rating_type_id " \ "WHERE c.header_id=? AND r.pdm_rating_instance_id IS NULL" db.execute(query, header_id) missing_ratings = db.fetchall() if not missing_ratings: raise utils.RatingError("At least the current rating (%s) should " \ "be missing for header_id=%d. (This is how the header " "IDs were selected.)" % (rater.long_name, header_id)) # Get pfds for this header_id if DBNAME == 'common2' or DBNAME == 'common3': tmpdir, fn_mapping = get_pfds_from_ftp(header_id) else: tmpdir, fn_mapping = get_pfds_from_db(header_id) try: rated_cands = [] # Rate pfds for this header_id for cand_id, pfd_fn in fn_mapping.iteritems(): raters_to_use = [loaded_raters[(x[1], x[2])] for x in missing_ratings \ if x[0]==cand_id and (x[1], x[2]) in loaded_raters] cand = rate_pfd(os.path.join(tmpdir, pfd_fn), raters_to_use) # Add candidate ID number to facilitate uploading cand.id = cand_id rated_cands.append(cand) # Upload rating values query_args = [] for cand in rated_cands: if len(cand.rating_values): query = "INSERT INTO pdm_rating " + \ "(value, pdm_rating_instance_id, pdm_cand_id, date) " for ratval in cand.rating_values: if not ratval.value is None and np.abs(ratval.value) < 1e-307: ratval.value = 0.0 if not ratval.value is None and np.isinf(ratval.value): ratval.value = 9999.0 instance_id = rat_inst_id_cache.get_id(ratval.name, \ ratval.version, \ ratval.description) value = np.float(ratval.value) if not ratval.value is None else None if value is None or np.isnan(value): query += "SELECT NULL, %d, %d, GETDATE() UNION ALL " % \ (instance_id, cand.id) else: query += "SELECT '%.12g', %d, %d, GETDATE() UNION ALL " % \ (ratval.value, instance_id, cand.id) query = query.rstrip('UNION ALL') # remove trailing 'UNION ALL' from query db.execute(query) finally: # Remove the temporary directory containing pfd files shutil.rmtree(tmpdir) finally: db.close()
def _compute_data(self, cand): """Fit the candidate's profile with multiple gaussian components and return the fit's parameters. Input: cand: A ratings2.0 Candidate object. Output: multigaussfit: The corresponding fit. A MultiGaussFit object. """ prof = cand.get_from_cache('profile') pfd = cand.get_from_cache('pfd') data = utils.get_scaled_profile(prof, pfd.varprof) # Initialize some starting values nbins = len(data) ngaussians = 0 # After normalization the first parameter (offset) should be close to zero prev_params = [0.0] # Nothing fit yet, so residuals are just the data values prev_residuals = data - np.zeros_like(data) # No need to normalize chi^2 by variance since we already did that to the # data prev_chi2 = sum(prev_residuals * prev_residuals) prev_dof = nbins fit = True # We will now start fitting Gaussian profile components until the # additional components are no longer statistically needed to improve the # fit. The starting parameter guesses for each new component will come # from the highest remaining residual and from the previous best-fit values # for previous components while fit: ngaussians += 1 # Update values based on results of previous run trial_params = list(prev_params) # Guess the parameters for the next profile component amplitude = max(prev_residuals) # Base FWHM on stats.norm normalization fwhm = 2 * np.sqrt( 2 * np.log(2)) / (np.sqrt(2 * np.pi) * amplitude) phase = np.argmax(prev_residuals) / float(nbins) trial_params.append(amplitude) trial_params.append(fwhm) trial_params.append(phase) if self.USE_MPFIT: # params_dict is used by mpfit to get initial values and constraints on # parameters params_dict = [] for ii, param in enumerate(trial_params): if ii == 0: # The first parameter is the offset, which can be negative and # should be allowed to vary more params_dict.append({ "value": param, "fixed": False, "limited": [False, False], "limits": [0.0, 0.0] }) elif (ii - 1) % 3 == 1: # This is the FWHM, and is allowed to vary between # 1/nbins and 1.0 params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [1.0 / nbins, 1.0] }) else: # Limits are set assuming that our initial guesses were correct # to within 25%... params_dict.append({ "value": param, "fixed": False, "limited": [True, True], "limits": [0.25 * param, 1.75 * param] }) # Define the fitting function for mpfit def func(params, fjac=None, errs=None): fit = utils.multigaussfit_from_paramlist(params) # Return values are [status, residuals] return [0, fit.get_resids(data)] # Now fit mpfit_out = mpfit.mpfit(func, parinfo=params_dict, quiet=True) # Store the new best-fit parameters new_params = mpfit_out.params else: import scipy.optimize def func(params): #print "DEBUG: params", params fit = utils.multigaussfit_from_paramlist(params) return fit.get_resids(data) new_params, status = scipy.optimize.leastsq(func, trial_params) if status not in (1, 2, 3, 4): raise utils.RatingError("Status returned by " \ "scipy.optimize.leastsq (%d) " \ "indicates the fit failed!" % status) # Calculate the new residuals and statistics new_fit = utils.multigaussfit_from_paramlist(new_params) #print "DEBUG: new_fit", new_fit new_residuals = new_fit.get_resids(data) new_chi2 = new_fit.get_chisqr(data) new_dof = new_fit.get_dof(len(data)) # Degrees-of-freedom # Calculate the F-statistic for the fit, i.e. the probability that the # additional profile component is /not/ required by the data F_stat = psr_utils.Ftest(prev_chi2, prev_dof, \ new_chi2, new_dof) # If the F-test probability is greater than some threshold, then the # additional Gaussian did not significantly improve the fit and we # should stop. The nan test is needed because if the fit is /worse/ # then Ftest doesn't return a valid number. Also stop if we reach # the maximum number of Gaussian profile components. Stop if the # fwhm of the added component is greater than 1.0 if F_stat > self.F_stat_threshold or np.isnan(F_stat) \ or ngaussians > self.max_gaussians \ or new_fit.components[-1].fwhm > 1.0 \ or new_fit.components[-1].fwhm < 1.0/nbins: fit = False # Otherwise, keep fitting and update the parameters for the next pass else: fit = True prev_params = new_params prev_residuals = new_residuals prev_chi2 = new_chi2 prev_dof = new_dof # We stop when a fit is no longer needed, so we have to return the values # from the /previous/ run (otherwise we return the unneeded fit) #print "DEBUG: prev_params", prev_params finalfit = utils.multigaussfit_from_paramlist(prev_params) #print "DEBUG: finalfit", finalfit return finalfit