예제 #1
0
    def _compute_rating(self, cand):
        """Return a rating for the candidate. The rating value is the
            ratio of the width of the narrowest gaussian component 
            to the DM smearing.
        
            Input:
                cand: A Candidate object to rate.

            Output:
                value: The rating value.
        """
        pfd = cand.pfd
        mgauss = cand.multigaussfit
        ncomp = len(mgauss.components)
        if not ncomp:
            raise utils.RatingError("Bad number of components for single " \
                                    "gaussian fit (%d)" % ncomp)

        # Get the period
        period = pfd.bary_p1 or pfd.topo_p1
        if period is None:
            raise utils.RatingError("Bad period in PFD file (%f)" % period)
        f_ctr = (pfd.hifreq + pfd.lofreq) / 2.0
        dm_smear = psr_utils.dm_smear(pfd.bestdm, pfd.chan_wid, f_ctr)
        width_phs = np.sqrt(dm_smear**2 + pfd.dt**2) / period

        minfwhm = min([comp.fwhm for comp in mgauss.components])
        return width_phs / minfwhm
예제 #2
0
    def get_onpulse_region(self, nbins):
        """Return a tuple of phases that represent the on-pulse window.

            Inputs:
                nbins: Number of phase bins.

            Output:
                onpulse: A tuple of phases, between which are the 
                    on-pulse region.
        """
        if not self.components:
            raise utils.RatingError("Multi-Gauss fit has no components " \
                                    "(i.e. no on-pulse region)")
        onpulse_region = np.zeros(nbins, dtype=bool)
        for comp in self.components:
            onpulse_region |= comp.get_onpulse_region(nbins)
        return onpulse_region
예제 #3
0
    def _compute_data(self, cand):
        """Fit the candidate's profile with multiple gaussian
            components and return the fit's parameters.

            Input:
                cand: A ratings2.0 SPCandidate object.

            Output:
                multigaussfit: The corresponding fit. A MultiGaussFit object.
        """
        data = utils.get_scaled_profile(cand.profile, cand.spd.varprof)

        # Initialize some starting values
        nbins = len(data)

        trial_params = [0.0]

        amplitude = max(data[(0.1 * nbins):(0.4 * nbins)])
        fwhm = 0.02  # full window should be 50 times estimated pulse width
        phase = 0.25  # this is where the single pulse should be placed
        trial_params.append(amplitude)
        trial_params.append(fwhm)
        trial_params.append(phase)

        from scipy.optimize import leastsq

        def func(params):
            #print "DEBUG: params", params
            # since this is single gaussian, params is just [offset, amp, std, phs]
            fit = utils.multigaussfit_from_paramlist(params)
            return fit.get_resids(data)

        new_params, status = leastsq(func, trial_params)
        if status not in (1, 2, 3, 4):
            raise utils.RatingError("Status returned by " \
                                "scipy.optimize.leastsq (%d) " \
                                "indicates the fit failed!" % status)

        new_fit = utils.multigaussfit_from_paramlist(new_params)

        return new_fit
예제 #4
0
    def get_onpulse_region(self, nbins):
        """Return a tuple of phases that represent the on-pulse window.

            Inputs:
                nbins: Number of phase bins.

            Output:
                onpulse: A tuple of phases, between which are the 
                    on-pulse region.
        """
        # Determine fudge factor depending on width
        if self.fwhm < 0.1:
            fudge_factor = 4.0
        elif self.fwhm < 0.2:
            fudge_factor = 2.0
        elif self.fwhm < 0.4:
            fudge_factor = 1.5
        else:
            fudge_factor = 1.5

        if self.fwhm * fudge_factor > 1.0:
            raise utils.RatingError(
                "Fudge factored FWHM is larger than 1.0 in phase")

        start_phase = self.phs - (self.fwhm * fudge_factor) / 2.0
        end_phase = self.phs + (self.fwhm * fudge_factor) / 2.0

        start_phase %= 1
        end_phase %= 1

        start_bin = int(start_phase * nbins + 0.5)  # Round to nearest integer
        end_bin = int(end_phase * nbins + 0.5)  # Round to nearest integer
        onpulse_length = (end_bin - start_bin) % nbins
        onpulse_indices = np.arange(start_bin,
                                    start_bin + onpulse_length) % nbins
        onpulse_region = np.zeros(nbins, dtype=bool)
        onpulse_region[onpulse_indices] = True
        return onpulse_region
예제 #5
0
def main():
    if args.num_procs > 1:
        warning.warn("Multithreading not implemnted (%d threads requested)" % \
                            args.num_procs)
    
    if not args.raters:
        print "No raters are loaded."
        args.list_raters = True

    if args.list_raters:
        utils.print_raters_list(args.verbosity)
        sys.exit(0)

    rat_inst_id_cache = utils.RatingInstanceIDCache(DBNAME)
    loaded_raters = {}
    for rater_name in args.raters:
        rater_module = getattr(raters, rater_name)
        rater = rater_module.Rater()
        loaded_raters[(rater.long_name, rater.version)] = rater
  
    db = database.Database(DBNAME)
    try:
        for rater in loaded_raters.values():
            rating_instance_id = rat_inst_id_cache.get_id(rater.long_name, \
                                                          rater.version, \
                                                          rater.description)
            header_ids = get_beams_to_rate(rating_instance_id)
            print "For rater %s have %d beams to rate." % (rater.long_name,len(header_ids))

            for header_id in header_ids:
                # For candidates with this header_id find which current ratings 
                # are not computed.
                #
                # NOTE: We use 'r.pdm_rating_instance_id' in the WHERE clause
                # because it will be NULL if a rating does not exist in
                # the 'pdm_rating' table. However, it _will_ be set if the rating
                # exists, but has a value of NULL (i.e. the rating failed). 
                # If we were used 'r.value' instead, we would try to re-compute
                # failed ratings.
                query = "SELECT c.pdm_cand_id, " \
                            "rt.name, " \
                            "ri.version " \
                        "FROM pdm_candidates AS c WITH(NOLOCK) " \
                        "CROSS JOIN (SELECT rt.pdm_rating_type_id, " \
                                        "MAX(ri.pdm_rating_instance_id) " \
                                            "AS current_instance_id " \
                                    "FROM pdm_rating_instance AS ri WITH(NOLOCK) " \
                                    "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                                        "ON ri.pdm_rating_type_id=rt.pdm_rating_type_id " \
                                    "GROUP BY rt.pdm_rating_type_id) AS ci " \
                        "LEFT JOIN pdm_rating_instance AS ri WITH(NOLOCK) " \
                            "ON ri.pdm_rating_instance_id=ci.current_instance_id " \
                        "LEFT JOIN pdm_rating AS r WITH(NOLOCK) " \
                            "ON r.pdm_cand_id=c.pdm_cand_id " \
                                "AND ri.pdm_rating_instance_id=r.pdm_rating_instance_id " \
                        "LEFT JOIN pdm_rating_type AS rt WITH(NOLOCK) " \
                            "ON rt.pdm_rating_type_id=ri.pdm_rating_type_id " \
                        "WHERE c.header_id=? AND r.pdm_rating_instance_id IS NULL"
                db.execute(query, header_id)
                missing_ratings = db.fetchall()
 
                if not missing_ratings:
                    raise utils.RatingError("At least the current rating (%s) should " \
                                        "be missing for header_id=%d. (This is how the header "
                                        "IDs were selected.)" % (rater.long_name, header_id))
 
                # Get pfds for this header_id
                if DBNAME == 'common2' or DBNAME == 'common3':
                    tmpdir, fn_mapping = get_pfds_from_ftp(header_id)
                else:
                    tmpdir, fn_mapping = get_pfds_from_db(header_id)
 
                try:
                    rated_cands = []
                    # Rate pfds for this header_id
                    for cand_id, pfd_fn in fn_mapping.iteritems():
                        raters_to_use = [loaded_raters[(x[1], x[2])] for x in missing_ratings \
                                            if x[0]==cand_id and (x[1], x[2]) in loaded_raters]
                        cand = rate_pfd(os.path.join(tmpdir, pfd_fn), raters_to_use)
                        
                        # Add candidate ID number to facilitate uploading
                        cand.id = cand_id
                        rated_cands.append(cand)
                 
                    # Upload rating values
                    query_args = []
                    for cand in rated_cands:
                        if len(cand.rating_values):
                            query = "INSERT INTO pdm_rating " + \
                                    "(value, pdm_rating_instance_id, pdm_cand_id, date) "
                            for ratval in cand.rating_values:
                                if not ratval.value is None and np.abs(ratval.value) < 1e-307:
                                    ratval.value = 0.0

                                if not ratval.value is None and np.isinf(ratval.value):
                                    ratval.value = 9999.0
                                instance_id = rat_inst_id_cache.get_id(ratval.name, \
                                                                       ratval.version, \
                                                                       ratval.description)            

                                value = np.float(ratval.value) if not ratval.value is None else None

                                if value is None or np.isnan(value):
                                    query += "SELECT NULL, %d, %d, GETDATE() UNION ALL " % \
                                              (instance_id, cand.id)
                                else:
                                    query += "SELECT '%.12g', %d, %d, GETDATE() UNION ALL " % \
                                              (ratval.value, instance_id, cand.id)

                            query = query.rstrip('UNION ALL') # remove trailing 'UNION ALL' from query

                            db.execute(query)

                finally:    
                    # Remove the temporary directory containing pfd files
                    shutil.rmtree(tmpdir)
    finally:
        db.close()
예제 #6
0
    def _compute_data(self, cand):
        """Fit the candidate's profile with multiple gaussian
            components and return the fit's parameters.

            Input:
                cand: A ratings2.0 Candidate object.

            Output:
                multigaussfit: The corresponding fit. A MultiGaussFit object.
        """
        prof = cand.get_from_cache('profile')
        pfd = cand.get_from_cache('pfd')
        data = utils.get_scaled_profile(prof, pfd.varprof)

        # Initialize some starting values
        nbins = len(data)
        ngaussians = 0
        # After normalization the first parameter (offset) should be close to zero
        prev_params = [0.0]
        # Nothing fit yet, so residuals are just the data values
        prev_residuals = data - np.zeros_like(data)
        # No need to normalize chi^2 by variance since we already did that to the
        # data
        prev_chi2 = sum(prev_residuals * prev_residuals)
        prev_dof = nbins
        fit = True

        # We will now start fitting Gaussian profile components until the
        # additional components are no longer statistically needed to improve the
        # fit.  The starting parameter guesses for each new component will come
        # from the highest remaining residual and from the previous best-fit values
        # for previous components
        while fit:
            ngaussians += 1
            # Update values based on results of previous run
            trial_params = list(prev_params)

            # Guess the parameters for the next profile component
            amplitude = max(prev_residuals)
            # Base FWHM on stats.norm normalization
            fwhm = 2 * np.sqrt(
                2 * np.log(2)) / (np.sqrt(2 * np.pi) * amplitude)
            phase = np.argmax(prev_residuals) / float(nbins)
            trial_params.append(amplitude)
            trial_params.append(fwhm)
            trial_params.append(phase)
            if self.USE_MPFIT:
                # params_dict is used by mpfit to get initial values and constraints on
                # parameters
                params_dict = []
                for ii, param in enumerate(trial_params):
                    if ii == 0:
                        # The first parameter is the offset, which can be negative and
                        # should be allowed to vary more
                        params_dict.append({
                            "value": param,
                            "fixed": False,
                            "limited": [False, False],
                            "limits": [0.0, 0.0]
                        })
                    elif (ii - 1) % 3 == 1:
                        # This is the FWHM, and is allowed to vary between
                        # 1/nbins and 1.0
                        params_dict.append({
                            "value": param,
                            "fixed": False,
                            "limited": [True, True],
                            "limits": [1.0 / nbins, 1.0]
                        })
                    else:
                        # Limits are set assuming that our initial guesses were correct
                        # to within 25%...
                        params_dict.append({
                            "value":
                            param,
                            "fixed":
                            False,
                            "limited": [True, True],
                            "limits": [0.25 * param, 1.75 * param]
                        })

                # Define the fitting function for mpfit
                def func(params, fjac=None, errs=None):
                    fit = utils.multigaussfit_from_paramlist(params)
                    # Return values are [status, residuals]
                    return [0, fit.get_resids(data)]

                # Now fit
                mpfit_out = mpfit.mpfit(func, parinfo=params_dict, quiet=True)
                # Store the new best-fit parameters
                new_params = mpfit_out.params
            else:
                import scipy.optimize

                def func(params):
                    #print "DEBUG: params", params
                    fit = utils.multigaussfit_from_paramlist(params)
                    return fit.get_resids(data)

                new_params, status = scipy.optimize.leastsq(func, trial_params)
                if status not in (1, 2, 3, 4):
                    raise utils.RatingError("Status returned by " \
                                        "scipy.optimize.leastsq (%d) " \
                                        "indicates the fit failed!" % status)

            # Calculate the new residuals and statistics
            new_fit = utils.multigaussfit_from_paramlist(new_params)
            #print "DEBUG: new_fit", new_fit
            new_residuals = new_fit.get_resids(data)
            new_chi2 = new_fit.get_chisqr(data)
            new_dof = new_fit.get_dof(len(data))  # Degrees-of-freedom
            # Calculate the F-statistic for the fit, i.e. the probability that the
            # additional profile component is /not/ required by the data
            F_stat        = psr_utils.Ftest(prev_chi2, prev_dof, \
                                                new_chi2, new_dof)

            # If the F-test probability is greater than some threshold, then the
            # additional Gaussian did not significantly improve the fit and we
            # should stop.  The nan test is needed because if the fit is /worse/
            # then Ftest doesn't return a valid number.  Also stop if we reach
            # the maximum number of Gaussian profile components. Stop if the
            # fwhm of the added component is greater than 1.0
            if F_stat > self.F_stat_threshold or np.isnan(F_stat) \
                   or ngaussians > self.max_gaussians \
                   or new_fit.components[-1].fwhm > 1.0 \
                   or new_fit.components[-1].fwhm < 1.0/nbins:
                fit = False
            # Otherwise, keep fitting and update the parameters for the next pass
            else:
                fit = True
                prev_params = new_params
                prev_residuals = new_residuals
                prev_chi2 = new_chi2
                prev_dof = new_dof

        # We stop when a fit is no longer needed, so we have to return the values
        # from the /previous/ run (otherwise we return the unneeded fit)
        #print "DEBUG: prev_params", prev_params
        finalfit = utils.multigaussfit_from_paramlist(prev_params)
        #print "DEBUG: finalfit", finalfit
        return finalfit