Exemplo n.º 1
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in YYMMDD format.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(errormsg)

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg  = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date=datetime.datetime.now().strftime('%y%m%d')
    outdir = os.path.join(config.jobpooler.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    return outdir
Exemplo n.º 2
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in YYMMDD format.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(errormsg)

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%d')
    outdir = os.path.join(config.jobpooler.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    return outdir
Exemplo n.º 3
0
    def __init__(self, filenms, resultsdir):
        # Where to dump all the results
        self.outputdir = resultsdir
        
        self.filenms = filenms
        self.filenmstr = ' '.join(self.filenms)
        self.basefilenm = os.path.split(filenms[0])[1].rstrip(".fits")

        # Read info from PSRFITS file
        data = datafile.autogen_dataobj(self.filenms)
        # Correct positions in data file headers for WappPsrfitsData
        if isinstance(data, datafile.WappPsrfitsData):
            data.update_positions()
        
        spec_info = data.specinfo
        self.backend = spec_info.backend
        self.MJD = spec_info.start_MJD[0]
        self.ra_string = spec_info.ra_str
        self.dec_string = spec_info.dec_str
        self.orig_N = spec_info.N
        self.dt = spec_info.dt # in sec
        self.BW = spec_info.BW
        self.orig_T = spec_info.T
        # Downsampling is catered to the number of samples per row.
        # self.N = psr_utils.choose_N(self.orig_N)
        self.N = self.orig_N
        self.T = self.N * self.dt
        self.nchan = spec_info.num_channels
        self.samp_per_row = spec_info.spectra_per_subint
        self.fctr = spec_info.fctr
        self.numrows = np.sum(spec_info.num_subint) 
       
        # Determine the average barycentric velocity of the observation
        self.baryv = get_baryv(self.ra_string, self.dec_string,
                               self.MJD, self.T, obs="AO")
        # Figure out which host we are processing on
        self.hostname = socket.gethostname()
        # The fraction of the data recommended to be masked by rfifind
        self.masked_fraction = 0.0
        # The number of candidates folded
        self.num_cands_folded = 0
        # Initialize our timers
        self.rfifind_time = 0.0
        self.downsample_time = 0.0
        self.subbanding_time = 0.0
        self.dedispersing_time = 0.0
        self.FFT_time = 0.0
        self.lo_accelsearch_time = 0.0
        self.hi_accelsearch_time = 0.0
        self.singlepulse_time = 0.0
        self.sifting_time = 0.0
        self.folding_time = 0.0
        self.total_time = 0.0
        # Inialize some candidate counters
        self.num_sifted_cands = 0
        self.num_folded_cands = 0
        self.num_single_cands = 0
        # Set dedispersion plan
        self.set_DDplan()
Exemplo n.º 4
0
    def __init__(self, filenms, resultsdir):
        # Where to dump all the results
        self.outputdir = resultsdir
        
        self.filenms = filenms
        self.filenmstr = ' '.join(self.filenms)
        self.basefilenm = os.path.split(filenms[0])[1].rstrip(".fits")

        # Read info from PSRFITS file
        data = datafile.autogen_dataobj(self.filenms)
        # Correct positions in data file headers for WappPsrfitsData
        if isinstance(data, datafile.WappPsrfitsData):
            data.update_positions()
        
        spec_info = data.specinfo
        self.backend = spec_info.backend
        self.MJD = spec_info.start_MJD[0]
        self.ra_string = spec_info.ra_str
        self.dec_string = spec_info.dec_str
        self.orig_N = spec_info.N
        self.dt = spec_info.dt # in sec
        self.BW = spec_info.BW
        self.orig_T = spec_info.T
        # Downsampling is catered to the number of samples per row.
        # self.N = psr_utils.choose_N(self.orig_N)
        self.N = self.orig_N
        self.T = self.N * self.dt
        self.nchan = spec_info.num_channels
        self.samp_per_row = spec_info.spectra_per_subint
        self.fctr = spec_info.fctr
        self.numrows = np.sum(spec_info.num_subint) 
       
        # Determine the average barycentric velocity of the observation
        self.baryv = get_baryv(self.ra_string, self.dec_string,
                               self.MJD, self.T, obs="AO")
        # Figure out which host we are processing on
        self.hostname = socket.gethostname()
        # The fraction of the data recommended to be masked by rfifind
        self.masked_fraction = 0.0
        # The number of candidates folded
        self.num_cands_folded = 0
        # Initialize our timers
        self.rfifind_time = 0.0
        self.downsample_time = 0.0
        self.subbanding_time = 0.0
        self.dedispersing_time = 0.0
        self.FFT_time = 0.0
        self.lo_accelsearch_time = 0.0
        self.hi_accelsearch_time = 0.0
        self.singlepulse_time = 0.0
        self.sifting_time = 0.0
        self.folding_time = 0.0
        self.total_time = 0.0
        # Inialize some candidate counters
        self.num_sifted_cands = 0
        self.num_folded_cands = 0
        self.num_single_cands = 0
        # Set dedispersion plan
        self.set_DDplan()
Exemplo n.º 5
0
def upload_results(dir):
  
    try:
	db = database.Database('default', autocommit=False)

	if not os.path.exists(dir) or not os.listdir(dir):
	    errormsg = 'ERROR: Results directory, %s, does not exist or is empty' % dir
            raise upload.UploadNonFatalError(errormsg)

	
	fitsfiles = glob.glob(os.path.join(dir, "*.fits"))
	data = datafile.autogen_dataobj(fitsfiles)
	version_number = JobUploader.get_version_number(dir)

	hdr = header.get_header(fitsfiles)
	print "\tHeader parsed."

	cands, tempdir = candidates.get_candidates(version_number, dir)
	print "\tPeriodicity candidates parsed."
	sp_cands = sp_candidates.get_spcandidates(version_number, dir)
	print "\tSingle pulse candidates parsed."

	for c in (cands + sp_cands):
	    hdr.add_dependent(c)
	diags = diagnostics.get_diagnostics(data.obs_name, 
					     data.beam_id, \
					     data.obstype, \
					     version_number, \
					     dir)
	print "\tDiagnostics parsed."

	header_id = hdr.upload(db)
	for d in diags:
	    d.upload(db)
	print "\tEverything uploaded and checked successfully. header_id=%d" % \
		    header_id

    except (upload.UploadNonFatalError):
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg  = "Error while checking results!\n"
        errormsg += "\tResults Dir: %s\n\n" % dir
        errormsg += "".join(exceptionmsgs)

        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        # Rolling back changes. 
        db.rollback()

    except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\
               upload.UploadDeadlockError, database.DatabaseDeadlockError), e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write("\tRolling back DB transaction and will re-try later.\n")
        
        # Rolling back changes. 
        db.rollback()
Exemplo n.º 6
0
    def __init__(self, datafns, *args, **kwargs):
        if isinstance(datafns, datafile.Data):
            self.data = datafns
        else:
            self.data = datafile.autogen_dataobj(datafns, *args, **kwargs)

        # List of dependents (ie other uploadables that require
        # the header_id from this header)
        self.dependents = []
Exemplo n.º 7
0
    def __init__(self, datafns, *args, **kwargs):
        if isinstance(datafns, datafile.Data):
            self.data = datafns
        else:
            self.data = datafile.autogen_dataobj(datafns, *args, **kwargs)
       

        # List of dependents (ie other uploadables that require 
        # the header_id from this header)
        self.dependents = []
Exemplo n.º 8
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)

        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir

    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)

    # Make the directory immediately so the pipeline knows it's taken
    try:
        os.makedirs(outdir)
        os.chmod(outdir, 0777)
        created = True
        return outdir, created
    except:
        current_usr = getpass.getuser()
        errormsg = "Permission denied to %s when attempting to create output directory %s" % (
            current_usr, outdir)
        f = open(
            '/scratch/eparent/eparent/PALFA4/results/missing_permission.out',
            'a')
        f.write(errormsg + '\n')
        f.close()
        print errormsg
        created = False
        return outdir, created
Exemplo n.º 9
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg  = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir
    
    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)
    
    # Make the directory immediately so the pipeline knows it's taken
    os.makedirs(outdir)

    # Send an email if our first choice for outdir wasn't available
    if counter:
        errormsg = "The first-choice output directory '%s' " \
                    "already existed. Had to settle for '%s' " \
                    "after %d tries. \n\n " \
                    "Data files:\n " \
                    "\t%s" % (baseoutdir, outdir, counter, "\n\t".join(fns))
        notification = mailer.ErrorMailer(errormsg, \
                        subject="Job outdir existance warning")
        notification.send()
    return outdir
Exemplo n.º 10
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir

    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)

    # Make the directory immediately so the pipeline knows it's taken
    os.makedirs(outdir)

    # Send an email if our first choice for outdir wasn't available
    if counter:
        errormsg = "The first-choice output directory '%s' " \
                    "already existed. Had to settle for '%s' " \
                    "after %d tries. \n\n " \
                    "Data files:\n " \
                    "\t%s" % (baseoutdir, outdir, counter, "\n\t".join(fns))
        notification = mailer.ErrorMailer(errormsg, \
                        subject="Job outdir existance warning")
        notification.send()
    return outdir
Exemplo n.º 11
0
def copy_zaplist(fns, workdir):
    # Copy zaplist to working directory
    data = datafile.autogen_dataobj(fns)

    # First, try to find custom zaplist for this MJD
    customzapfn = os.path.join(config.processing.zaplistdir, \
                                "autozap_mjd%d.zaplist" % int(data.timestamp_mjd))
    if os.path.exists(customzapfn):
        # Copy custom zaplist to workdir and rename to the expected zaplist fn
        shutil.copy(customzapfn, workdir)
        print "Copied custom zaplist: %s" % customzapfn
    else:
        # Copy default zaplist
        shutil.copy(config.processing.default_zaplist, workdir)
        print "No custom zaplist found. Copied default zaplist: %s" % \
                config.processing.default_zaplist
Exemplo n.º 12
0
def copy_zaplist(fns, workdir):
    # Copy zaplist to working directory
    data = datafile.autogen_dataobj(fns)

    # First, try to find custom zaplist for this MJD
    customzapfn = os.path.join(config.processing.zaplistdir, \
                                "autozap_mjd%d.zaplist" % int(data.timestamp_mjd))
    if os.path.exists(customzapfn):
        # Copy custom zaplist to workdir and rename to the expected zaplist fn
        shutil.copy(customzapfn, workdir)
        print "Copied custom zaplist: %s" % customzapfn
    else:
        # Copy default zaplist
        shutil.copy(config.processing.default_zaplist, workdir)
        print "No custom zaplist found. Copied default zaplist: %s" % \
                config.processing.default_zaplist
Exemplo n.º 13
0
def presubmission_check(fns):
    """Check to see if datafiles meet the critera for submission.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(errormsg) # if files missing want to crash
    try:
        #for WAPP, check if coords are in table
        data = datafile.autogen_dataobj([fns[0]])
        if not isinstance(data, datafile.PsrfitsData):
            errormsg  = "Data must be of PSRFITS format.\n"
            errormsg += "\tData type: %s\n" % type(data)
            raise FailedPreCheckError(errormsg)
    except datafile.DataFileError, e:
        raise FailedPreCheckError(e)
Exemplo n.º 14
0
def presubmission_check(fns):
    """Check to see if datafiles meet the critera for submission.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(
            errormsg)  # if files missing want to crash
    try:
        #for WAPP, check if coords are in table
        data = datafile.autogen_dataobj([fns[0]])
        if not isinstance(data, datafile.PsrfitsData):
            errormsg = "Data must be of PSRFITS format.\n"
            errormsg += "\tData type: %s\n" % type(data)
            raise FailedPreCheckError(errormsg)
    except datafile.DataFileError, e:
        raise FailedPreCheckError(e)
Exemplo n.º 15
0
def presubmission_check(fns):
    """Check to see if datafiles meet the critera for submission.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        with open('~/missing_files.txt', 'w') as f:
            for missing in missingfiles:
                f.write(missing + '\n')
                errormsg += "\t%s\n" % missing
        raise MissingFilesError(errormsg)  # if files missing want to crash

    try:
        #for WAPP, check if coords are in table
        data = datafile.autogen_dataobj([fns[0]])
        print isinstance(data, datafile.PsrfitsData)
        if not isinstance(data, datafile.PsrfitsData):
            errormsg = "Data must be of PSRFITS format.\n"
            errormsg += "\tData type: %s\n" % type(data)
            raise FailedPreCheckError(errormsg)
    except (datafile.DataFileError, ValueError), e:
        raise FailedPreCheckError(e)
Exemplo n.º 16
0
def upload_results(job_submit):
    """
    Uploads Results for a given submit.

        Input:
            job_submit: A row from the job_submits table.
                Results from this job submission will be
                uploaded.

        Output:
            None
    """
    print "Attempting to upload results"
    print "\tJob ID: %d, Job submission ID: %d" % \
            (job_submit['job_id'], job_submit['id'])
    try:
        db = database.Database('common-copy', autocommit=False)
        # Prepare for upload
        dir = job_submit['output_dir']
        fitsfiles = get_fitsfiles(job_submit)

        # Upload results
        header_id = header.upload_header(fitsfiles, dbname=db)
        if not header.upload_header(fitsfiles, dbname=db):
            raise AssertionError("Header values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tHeader uploaded and checked. Header ID: %d" % header_id

        version_number = get_version_number(dir)
        candidate_uploader.upload_candidates(header_id, \
                                             version_number, \
                                             dir, dbname=db)
        if not candidate_uploader.check_candidates(header_id, \
                                             version_number, \
                                             dir, dbname=db):
            raise AssertionError("Candidate values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tCandidates uploaded and checked."

        data = datafile.autogen_dataobj(fitsfiles)
        diagnostic_uploader.upload_diagnostics(data.obs_name,
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir, dbname=db)
        if not diagnostic_uploader.check_diagnostics(data.obs_name,
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir, dbname=db):
            raise AssertionError("Diagnostic values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tDiagnostics uploaded and checked."
    except (header.HeaderError, \
            candidate_uploader.PeriodicityCandidateError, \
            diagnostic_uploader.DiagnosticError):
        # Parsing error caught. Job attempt has failed!
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while checking results!\n"
        errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \
                        (job_submit['job_id'], job_submit['id'])
        errormsg += "".join(exceptionmsgs)

        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        queries = []
        arglists = []
        queries.append("UPDATE job_submits " \
                       "SET status='upload_failed', " \
                            "details=?, " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((errormsg, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while uploading results', " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.execute(queries, arglists)

        # Rolling back changes.
        db.rollback()
    except database.DatabaseConnectionError, e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write(
            "\tRolling back DB transaction and will re-try later.\n")

        # Rolling back changes.
        db.rollback()
Exemplo n.º 17
0
def upload_results(job_submit):
    """
    Uploads Results for a given submit.

        Input:
            job_submit: A row from the job_submits table.
                Results from this job submission will be
                uploaded.

        Output:
            None
    """
    print "Attempting to upload results"
    print "\tJob ID: %d, Job submission ID: %d" % \
            (job_submit['job_id'], job_submit['id'])
    if debug.UPLOAD:
        upload.upload_timing_summary = {}
        starttime = time.time()
    try:
        # Connect to the DB
        db = database.Database('SPAN512', autocommit=False)
        # Prepare for upload
        dir = job_submit['output_dir']
        if not os.path.exists(dir) or not os.listdir(dir):
            errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\
                       (dir, job_submit['job_id'])
            raise upload.UploadNonFatalError(errormsg)

        fitsfiles = get_fitsfiles(job_submit)
        data = datafile.autogen_dataobj(fitsfiles)
        version_number = get_version_number(dir)

        if debug.UPLOAD: 
            parsetime = time.time()
        # Upload results
        hdr = header.get_header(fitsfiles)
        
        print "\tHeader parsed."

        rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(dbname='nancay')
        cands, tempdir = candidates.get_candidates(version_number, dir, \
                                                   timestamp_mjd=data.timestamp_mjd, \
                                                   inst_cache=rat_inst_id_cache)
        print "\tPeriodicity candidates parsed."
        sp_cands = sp_candidates.get_spcandidates(version_number, dir, \
                                                  timestamp_mjd=data.timestamp_mjd)
        print "\tSingle pulse candidates parsed."

        for c in (cands + sp_cands):
            hdr.add_dependent(c)
        diags = diagnostics.get_diagnostics(data.obs_name, 
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir)
        print "\tDiagnostics parsed."
        
        if debug.UPLOAD: 
            upload.upload_timing_summary['Parsing'] = \
                upload.upload_timing_summary.setdefault('Parsing', 0) + \
                (time.time()-parsetime)

        # Perform the upload
        header_id = hdr.upload(db)
        for d in diags:
            d.upload(db)
        print "\tDB upload completed and checked successfully. header_id=%d" % \
                    header_id


    except (upload.UploadNonFatalError):
        # Parsing error caught. Job attempt has failed!
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg  = "Error while checking results!\n"
        errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \
                        (job_submit['job_id'], job_submit['id'])
        errormsg += "".join(exceptionmsgs)
        
        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        queries = []
        arglists = []
        queries.append("UPDATE job_submits " \
                       "SET status='upload_failed', " \
                            "details=\"%s\", " \
                            "updated_at='%s' " \
                       "WHERE id=%d"%(errormsg.replace("\"","\'"), jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while uploading results', " \
                            "updated_at='%s' " \
                       "WHERE id=%d"%(jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.query(queries)
        
        # Rolling back changes. 
        db.rollback()
    except (database.DatabaseConnectionError, \
               upload.UploadDeadlockError, database.DatabaseDeadlockError), e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write("\tRolling back DB transaction and will re-try later.\n")
        
        # Rolling back changes. 
        db.rollback()
Exemplo n.º 18
0
def upload_results(job_submit):
    """
    Uploads Results for a given submit.

        Input:
            job_submit: A row from the job_submits table.
                Results from this job submission will be
                uploaded.

        Output:
            None
    """
    print "Attempting to upload results"
    print "\tJob ID: %d, Job submission ID: %d" % \
            (job_submit['job_id'], job_submit['id'])
    if debug.UPLOAD:
        upload.upload_timing_summary = {}
        starttime = time.time()
    try:
        # Connect to the DB
        db = database.Database('default', autocommit=False)
        # Prepare for upload
        dir = job_submit['output_dir']
        if not os.path.exists(dir) or not os.listdir(dir):
            errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\
                       (dir, job_submit['job_id'])
            raise upload.UploadNonFatalError(errormsg)

        fitsfiles = get_fitsfiles(job_submit)
        data = datafile.autogen_dataobj(fitsfiles)
        version_number = get_version_number(dir)

        if debug.UPLOAD:
            parsetime = time.time()
        # Upload results
        hdr = header.get_header(fitsfiles)

        print "\tHeader parsed."

        cands = candidates.get_candidates(version_number, dir)
        print "\tPeriodicity candidates parsed."
        sp_cands = sp_candidates.get_spcandidates(version_number, dir)
        print "\tSingle pulse candidates parsed."

        for c in (cands + sp_cands):
            hdr.add_dependent(c)
        diags = diagnostics.get_diagnostics(data.obs_name,
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir)
        print "\tDiagnostics parsed."

        if debug.UPLOAD:
            upload.upload_timing_summary['Parsing'] = \
                upload.upload_timing_summary.setdefault('Parsing', 0) + \
                (time.time()-parsetime)

        # Perform the upload
        header_id = hdr.upload(db)
        for d in diags:
            d.upload(db)
        print "\tEverything uploaded and checked successfully. header_id=%d" % \
                    header_id
    except (upload.UploadNonFatalError):
        # Parsing error caught. Job attempt has failed!
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while checking results!\n"
        errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \
                        (job_submit['job_id'], job_submit['id'])
        errormsg += "".join(exceptionmsgs)

        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        queries = []
        arglists = []
        queries.append("UPDATE job_submits " \
                       "SET status='upload_failed', " \
                            "details=?, " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((errormsg, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while uploading results', " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.execute(queries, arglists)

        # Rolling back changes.
        db.rollback()
    except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\
               upload.UploadDeadlockError, database.DatabaseDeadlockError), e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write(
            "\tRolling back DB transaction and will re-try later.\n")

        # Rolling back changes.
        db.rollback()
Exemplo n.º 19
0
def upload_results(job_submit):
    """
    Uploads Results for a given submit.

        Input:
            job_submit: A row from the job_submits table.
                Results from this job submission will be
                uploaded.

        Output:
            None
    """
    print "Attempting to upload results"
    print "\tJob ID: %d, Job submission ID: %d" % \
            (job_submit['job_id'], job_submit['id'])
    try:
        db = database.Database('common-copy', autocommit=False)
        # Prepare for upload
        dir = job_submit['output_dir']
        fitsfiles = get_fitsfiles(job_submit)

        # Upload results
        header_id = header.upload_header(fitsfiles, dbname=db)
        if not header.upload_header(fitsfiles, dbname=db):
            raise AssertionError("Header values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tHeader uploaded and checked. Header ID: %d" % header_id

        version_number = get_version_number(dir)
        candidate_uploader.upload_candidates(header_id, \
                                             version_number, \
                                             dir, dbname=db)
        if not candidate_uploader.check_candidates(header_id, \
                                             version_number, \
                                             dir, dbname=db):
            raise AssertionError("Candidate values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tCandidates uploaded and checked."

        data = datafile.autogen_dataobj(fitsfiles)
        diagnostic_uploader.upload_diagnostics(data.obs_name, 
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir, dbname=db)
        if not diagnostic_uploader.check_diagnostics(data.obs_name, 
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             dir, dbname=db):
            raise AssertionError("Diagnostic values in common DB " \
                                 "do not match values uploaded.")
        else:
            print "\tDiagnostics uploaded and checked."
    except (header.HeaderError, \
            candidate_uploader.PeriodicityCandidateError, \
            diagnostic_uploader.DiagnosticError):
        # Parsing error caught. Job attempt has failed!
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg  = "Error while checking results!\n"
        errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \
                        (job_submit['job_id'], job_submit['id'])
        errormsg += "".join(exceptionmsgs)
        
        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        queries = []
        arglists = []
        queries.append("UPDATE job_submits " \
                       "SET status='upload_failed', " \
                            "details=?, " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((errormsg, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while uploading results', " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.execute(queries, arglists)
        
        # Rolling back changes. 
        db.rollback()
    except database.DatabaseConnectionError, e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write("\tRolling back DB transaction and will re-try later.\n")
        
        # Rolling back changes. 
        db.rollback()
Exemplo n.º 20
0
def upload_results(dir):

    try:
        db = database.Database('default', autocommit=False)

        if not os.path.exists(dir) or not os.listdir(dir):
            errormsg = 'ERROR: Results directory, %s, does not exist or is empty' % dir
            raise upload.UploadNonFatalError(errormsg)

        pdm_dir = os.path.join(
            dir, "zerodm") if config.upload.upload_zerodm_periodicity else dir
        sp_dir = os.path.join(
            dir, "zerodm") if config.upload.upload_zerodm_singlepulse else dir
        fitsfiles = glob.glob(os.path.join(dir, "*.fits"))
        data = datafile.autogen_dataobj(fitsfiles)
        version_number = JobUploader.get_version_number(dir)

        hdr = header.get_header(fitsfiles)
        print "\tHeader parsed."

        rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(
            dbname='common3')  #!!!!
        #rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(dbname='MichellePalfaCands')
        #cands, tempdir = candidates.get_candidates(version_number, dir)
        cands, tempdir = candidates.get_candidates(version_number, pdm_dir, \
                                                   timestamp_mjd=data.timestamp_mjd, \
                                                   inst_cache=rat_inst_id_cache)
        print "\tPeriodicity candidates parsed."
        #sp_cands = sp_candidates.get_spcandidates(version_number, dir)
        sp_cands, tempdir_sp = sp_candidates.get_spcandidates(version_number, sp_dir, \
                                                              timestamp_mjd=data.timestamp_mjd, \
                                                              inst_cache=rat_inst_id_cache)
        print "\tSingle pulse candidates parsed."

        for c in (cands + sp_cands):
            hdr.add_dependent(c)
        diags = diagnostics.get_diagnostics(data.obs_name,
                 data.beam_id, \
                 data.obstype, \
                 version_number, \
                 pdm_dir, sp_dir)
        print "\tDiagnostics parsed."

        header_id = hdr.upload(db)
        for d in diags:
            d.upload(db)
        print "\tEverything uploaded and checked successfully. header_id=%d" % \
             header_id

    except (upload.UploadNonFatalError):
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while checking results!\n"
        errormsg += "\tResults Dir: %s\n\n" % dir
        errormsg += "".join(exceptionmsgs)

        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        # Rolling back changes.
        db.rollback()

    except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\
               upload.UploadDeadlockError, database.DatabaseDeadlockError), e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write(
            "\tRolling back DB transaction and will re-try later.\n")

        # Rolling back changes.
        db.rollback()
Exemplo n.º 21
0
def upload_results(job_submit):
    """
    Uploads Results for a given submit.

        Input:
            job_submit: A row from the job_submits table.
                Results from this job submission will be
                uploaded.

        Output:
            None
    """
    print "Attempting to upload results"

    print "\tJob ID: %d, Job submission ID: %d\n\tOutput Dir: %s" % \
            (job_submit['job_id'], job_submit['id'], job_submit['output_dir'])

    if debug.UPLOAD:
        upload.upload_timing_summary = {}
        starttime = time.time()
    try:
        # Connect to the DB
        db = database.Database('default', autocommit=False)

        # Prepare for upload
        dir = job_submit['output_dir']

        # NEW Beluga - Untar the tarball
        import tarfile
        to_keep = os.listdir(job_submit['output_dir'])
        tarball = glob.glob(job_submit['output_dir'] + '/*00.tgz')[0]
        tar = tarfile.open(tarball, 'r:gz')
        tar.extractall(path=job_submit['output_dir'])
        tar.close()

        all_files = os.listdir(job_submit['output_dir'])
        to_del = set(all_files) - set(to_keep)

        if config.upload.upload_zerodm_periodicity or config.upload.upload_zerodm_singlepulse:
            to_keep_zerodm = os.listdir(job_submit['output_dir'] + '/zerodm')
            tarball = glob.glob(job_submit['output_dir'] +
                                '/zerodm/*zerodm.tgz')[0]
            tar = tarfile.open(tarball, 'r:gz')
            tar.extractall(path=job_submit['output_dir'] + '/zerodm')
            tar.close()
            all_files_zerodm = os.listdir(job_submit['output_dir'] + '/zerodm')
            to_del_zerodm = set(all_files_zerodm) - set(to_keep_zerodm)

        pdm_dir = os.path.join(
            dir, "zerodm") if config.upload.upload_zerodm_periodicity else dir
        sp_dir = os.path.join(
            dir, "zerodm") if config.upload.upload_zerodm_singlepulse else dir

        if not os.path.exists(dir) or not os.listdir(dir):
            errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\
                       (dir, job_submit['job_id'])
            raise upload.UploadNonFatalError(errormsg)
        elif len(os.listdir(dir)) == 1 and os.listdir(dir)[0] == 'zerodm' \
                                       and not os.listdir(os.path.join(dir,os.listdir(dir)[0])):
            errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\
                       (dir, job_submit['job_id'])
            raise upload.UploadNonFatalError(errormsg)

        fitsfiles = get_fitsfiles(job_submit)
        try:
            data = datafile.autogen_dataobj(fitsfiles)
        except ValueError:
            raise upload.UploadNonFatalError
        version_number = get_version_number(dir)

        if debug.UPLOAD:
            parsetime = time.time()
        # Upload results
        hdr = header.get_header(fitsfiles)

        print "\tHeader parsed."

        rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(
            dbname='common3')

        cands, tempdir = candidates.get_candidates(version_number, pdm_dir, \
                                                   timestamp_mjd=data.timestamp_mjd, \
                                                   inst_cache=rat_inst_id_cache)
        print "\tPeriodicity candidates parsed. (%d cands)" % len(cands)
        sp_cands, tempdir_sp = sp_candidates.get_spcandidates(version_number, sp_dir, \
                                                              timestamp_mjd=data.timestamp_mjd, \
                                                              inst_cache=rat_inst_id_cache)
        print "\tSingle pulse candidates parsed. (%d cands)" % len(sp_cands)

        diags = diagnostics.get_diagnostics(data.obs_name,
                                             data.beam_id, \
                                             data.obstype, \
                                             version_number, \
                                             pdm_dir, sp_dir)
        print "\tDiagnostics parsed."

        for c in (cands + sp_cands):
            hdr.add_dependent(c)

        if debug.UPLOAD:
            upload.upload_timing_summary['Parsing'] = \
                upload.upload_timing_summary.setdefault('Parsing', 0) + \
                (time.time()-parsetime)

        # Perform the upload
        header_id = hdr.upload(db)
        print "Header ID: ", header_id
        for d in diags:
            d.upload(db)
        print "\tDB upload completed and checked successfully. header_id=%d" % \
                    header_id

    except (upload.UploadNonFatalError):
        # Parsing error caught. Job attempt has failed!
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while checking results!\n"
        errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \
                        (job_submit['job_id'], job_submit['id'])
        errormsg += "".join(exceptionmsgs)

        sys.stderr.write("Error while checking results!\n")
        sys.stderr.write("Database transaction will not be committed.\n")
        sys.stderr.write("\t%s" % exceptionmsgs[-1])

        queries = []
        arglists = []
        queries.append("UPDATE job_submits " \
                       "SET status='upload_failed', " \
                            "details=?, " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((errormsg, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while uploading results', " \
                            "updated_at=? " \
                       "WHERE id=?")
        arglists.append((jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.execute(queries, arglists)

        # Rolling back changes.
        db.rollback()
    except (database.DatabaseConnectionError, ratings2.database.DatabaseConnectionError,\
               CornellFTP.CornellFTPTimeout, upload.UploadDeadlockError,\
               database.DatabaseDeadlockError), e:
        # Connection error while uploading. We will try again later.
        sys.stderr.write(str(e))
        sys.stderr.write(
            "\tRolling back DB transaction and will re-try later.\n")

        # Rolling back changes.
        db.rollback()
Exemplo n.º 22
0
 def __init__(self, datafns, *args, **kwargs):
     if isinstance(datafns, datafile.Data):
         self.data = datafns
     else:
         self.data = datafile.autogen_dataobj(datafns, *args, **kwargs)
Exemplo n.º 23
0
 def __init__(self, datafns, *args, **kwargs):
     if isinstance(datafns, datafile.Data):
         self.data = datafns
     else:
         self.data = datafile.autogen_dataobj(datafns, *args, **kwargs)
Exemplo n.º 24
0
    def __init__(self, filenms, resultsdir, zerodm):

        # whether or not to zerodm timeseries
        self.zerodm = zerodm

        # which searches to perform
        self.search_pdm = True
        self.search_sp = True

        self.filenms = filenms
        self.filenmstr = ' '.join(self.filenms)
        self.basefilenm = os.path.split(filenms[0])[1].rstrip(".fits")

        # Where to dump all the results.
        # Put zerodm results in a separate folder so they don't overwrite
        # the non-zerodm results
        if self.zerodm:
            self.outputdir = os.path.join(resultsdir, 'zerodm')
            self.basefilenm = self.basefilenm + '_zerodm'
        else:
            self.outputdir = resultsdir

        # Read info from PSRFITS file
        data = datafile.autogen_dataobj(self.filenms)
        # Correct positions in data file headers

        spec_info = data.specinfo
        self.backend = spec_info.backend
        self.MJD = spec_info.start_MJD[0]
        self.ra_string = spec_info.ra_str
        self.dec_string = spec_info.dec_str
        self.orig_N = spec_info.N
        self.dt = spec_info.dt  # in sec
        self.BW = spec_info.BW
        self.orig_T = spec_info.T
        # Downsampling is catered to the number of samples per row.
        # self.N = psr_utils.choose_N(self.orig_N)
        self.N = self.orig_N
        self.T = self.N * self.dt
        self.nchan = spec_info.num_channels
        self.samp_per_row = spec_info.spectra_per_subint
        self.fctr = spec_info.fctr
        self.numrows = np.sum(spec_info.num_subint)
        print "JGM: RA:"
        print self.ra_string
        print "DEC: "
        print self.dec_string
        print "MJD:"
        print self.MJD
        print "T: "
        print self.T

        # Determine the average barycentric velocity of the observation
        self.baryv = get_baryv(self.ra_string,
                               self.dec_string,
                               self.MJD,
                               self.T,
                               obs="NC")
        # Figure out which host we are processing on
        self.hostname = socket.gethostname()
        # The fraction of the data recommended to be masked by rfifind
        self.masked_fraction = 0.0
        # The number of candidates folded
        self.num_cands_folded = 0
        # Initialize our timers
        self.rfifind_time = 0.0
        self.downsample_time = 0.0
        self.subbanding_time = 0.0
        self.dedispersing_time = 0.0
        self.FFT_time = 0.0
        self.lo_accelsearch_time = 0.0
        self.hi_accelsearch_time = 0.0
        self.singlepulse_time = 0.0
        self.sp_grouping_time = 0.0
        self.sifting_time = 0.0
        self.folding_time = 0.0
        self.zerodm_time = 0.0
        self.total_time = 0.0
        # Inialize some candidate counters
        self.num_sifted_cands = 0
        self.num_folded_cands = 0
        self.num_single_cands = 0
        # Set dedispersion plan
        self.set_DDplan()