def load_rawfile(fn, existdb=None): # Connect to the database db = existdb or database.Database() db.connect() try: # Enter information in rawfiles table notify.print_info("Working on %s (%s)" % (fn, utils.give_utc_now()), 1) # Check the file and parse the header params = datafile.prep_file(fn) # Move the File destdir = datafile.get_archive_dir(fn, params=params) newfn = datafile.archive_file(fn, destdir) notify.print_info("%s moved to %s (%s)" % (fn, newfn, utils.give_utc_now()), 1) # Register the file into the database rawfile_id = populate_rawfiles_table(db, newfn, params) notify.print_info("Successfully loaded %s - rawfile_id=%d (%s)" % (fn, rawfile_id, utils.give_utc_now()), 1) finally: if not existdb: # Close DB connection db.close() return rawfile_id
def main(args): # Check to make sure user provided a comment if not args.dry_run and args.comments is None: raise errors.BadInputError("A comment describing the timfile is " "required!") if args.from_file is not None: # Re-create parser, so we can read arguments from file parser = utils.DefaultArguments() add_arguments(parser) if args.from_file == '-': argfile = sys.stdin else: if not os.path.exists(args.from_file): raise errors.FileError("The list of cmd line args (%s) " "does not exist." % args.from_file) argfile = open(args.from_file, 'r') for line in argfile: # Strip comments line = line.partition('#')[0].strip() if not line: # Skip empty line continue arglist = shlex.split(line.strip()) args = parser.parse_args(arglist, namespace=args) # Establish a database connection db = database.Database() db.connect() trans = db.begin() try: cmdline = " ".join(sys.argv) toas = get_toas(args, db) if debug.is_on('TIMFILE'): # Check for / handle conflicts conflict_handler = CONFLICT_HANDLERS[args.on_conflict] toas = conflict_handler(toas) wt.write_timfile(toas, {'comments': args.comments, 'user_id': cache.get_userid(), 'add_time': "Not in DB!", 'timfile_id': -1}) elif args.dry_run: print_summary(toas, args.comments) else: conflict_handler = CONFLICT_HANDLERS[args.on_conflict] timfile_id = add_timfile_entry(toas, cmdline, args.comments, conflict_handler) notify.print_info("Created new timfile entry - timfile_id=%d (%s)" % (timfile_id, utils.give_utc_now()), 1) except: db.rollback() db.close() raise else: db.commit() db.close()
def load_parfile(fn, is_master=False, existdb=None): # Connect to the database db = existdb or database.Database() db.connect() try: # Now load the parfile file into database notify.print_info("Working on %s (%s)" % (fn, utils.give_utc_now()), 1) # Check the parfile and parse it params = general.prep_parfile(fn) # Archive the parfile destdir = os.path.join(config.cfg.data_archive_location, 'parfiles', params['name']) newfn = datafile.archive_file(fn, destdir) # Register the parfile into the database parfile_id = populate_parfiles_table(db, newfn, params) masterpar_id, parfn = general.get_master_parfile(params['pulsar_id']) if masterpar_id is None: # If this is the only parfile for this pulsar # make sure it will be set as the master is_master = True if is_master: notify.print_info("Setting %s as master parfile (%s)" % (newfn, utils.give_utc_now()), 1) general.set_as_master_parfile(parfile_id, db) notify.print_info("Finished with %s - parfile_id=%d (%s)" % (fn, parfile_id, utils.give_utc_now()), 1) finally: if not existdb: # Close DB connection db.close() return parfile_id
def pipeline_core(manip, rawfile_id, parfile_id, template_id, existdb=None): """Run a prepared manipulator function on the raw file with ID 'rawfile_id'. Then generate TOAs and load them into the DB. Inputs: manip: A manipulator instance. rawfile_id: The ID number of the raw data file to generate TOAs from. parfile_id: The ID number of the parfile to install into the raw file. If this is None, then no new parfile will be installed. template_id: The ID number of the template to use. existdb: An existing database connection object. (Default: establish a new DB connection) Outputs: None """ # Initialise these so the 'finally' clause doesn't throw an exception of # it's own if an error is caught before these filenames are determined manipfn = '' adjustfn = '' #Start pipeline print "###################################################" print "Starting to toast data" print "Start time: %s" % utils.give_utc_now() print "###################################################" db = existdb or database.Database() db.connect() try: trans = db.begin() # Open a transaction # Get version ID version_id = version.get_version_id(db) # Get raw data from rawfile_id and verify MD5SUM rawfile = rawfiles_general.get_rawfile_from_id(rawfile_id, db, verify_md5=True) # Manipulate the raw file notify.print_info("Manipulating file", 1) # Create a temporary file for the adjusted results tmpfile, adjustfn = tempfile.mkstemp(prefix='toaster_tmp', suffix='_newephem.ar', dir=config.cfg.base_tmp_dir) os.close(tmpfile) shutil.copy(rawfile, adjustfn) if parfile_id is not None: # Re-install ephemeris # Get ephemeris from parfile_id and verify MD5SUM parfile = parfiles_general.get_parfile_from_id(parfile_id, db, verify_md5=True) cmd = ["pam", "-m", "-E", parfile, "--update_dm", adjustfn] utils.execute(cmd) # Create a temporary file for the manipulated results tmpfile, manipfn = tempfile.mkstemp(prefix='toaster_tmp', suffix='_manip.ar', dir=config.cfg.base_tmp_dir) os.close(tmpfile) # Run the manipulator manip.run([adjustfn], manipfn, tmpdir=config.cfg.base_tmp_dir) # Get template from template_id and verify MD5SUM template = templates_general.get_template_from_id(template_id, db, verify_md5=True) # Create a temporary file for the toa diagnostic plots tmpfile, toadiagfn = tempfile.mkstemp(prefix='toaster_tmp', suffix='_TOAdiag.png', dir=config.cfg.base_tmp_dir) os.close(tmpfile) # Generate TOAs with pat notify.print_info("Computing TOAs", 0) cmd = ["pat", "-f", "tempo2", "-A", config.cfg.toa_fitting_method, "-s", template, "-C", "gof length bw nbin nchan nsubint", "-t", "-K", "%s/PNG" % toadiagfn, manipfn] patout, paterr = utils.execute(cmd) # Check version ID is still the same. Just in case. new_version_id = version.get_version_id(db) if version_id != new_version_id: raise errors.ToasterError("Weird... Version ID at the start " "of processing (%s) is different " "from at the end (%d)!" % (version_id, new_version_id)) # Read some header values from the manipulated archive hdr = datafile.get_header_vals(manipfn, ['nchan', 'nsub', 'name', 'intmjd', 'fracmjd']) hdr['secs'] = int(hdr['fracmjd']*24*3600+0.5) # Add 0.5 so result is # rounded to nearest int # Fill pipeline table cmdline = " ".join(sys.argv) process_id = fill_process_table(version_id, rawfile_id, parfile_id, template_id, manip, hdr['nchan'], hdr['nsub'], db) # Parse pat output toainfo = toas_general.parse_pat_output(patout) rawfile_info = rawfiles_general.get_rawfile_info(rawfile_id) # Insert TOAs into DB for ti in toainfo: ti['process_id'] = process_id ti['template_id'] = template_id ti['rawfile_id'] = rawfile_id ti['pulsar_id'] = rawfile_info['pulsar_id'] ti['obssystem_id'] = rawfile_info['obssystem_id'] toa_ids = load_toa.load_toas(toainfo, db) # Create processing diagnostics notify.print_info("Generating processing diagnostics", 1) diagdir = make_proc_diagnostics_dir(manipfn, process_id) suffix = "_procid%d.%s" % (process_id, manip.name) diags = [] for diagname in config.cfg.default_rawfile_diagnostics: diagcls = diagnostics.get_diagnostic_class(diagname) try: diags.append(diagcls(manipfn)) except errors.DiagnosticNotApplicable, e: notify.print_info("Diagnostic isn't applicable: %s. " "Skipping..." % str(e), 1) if diags: # Load processing diagnostics diagnose_processing.insert_processing_diagnostics(process_id, diags, diagdir, suffix, existdb=db) # Copy TOA diagnostic plots and register them into DB basefn = "%(name)s_%(intmjd)05d_%(secs)05d" % hdr values = [] for ii, toa_id in enumerate(toa_ids): outfn = basefn+"_procid%d.TOA%d.png" % (process_id, ii+1) if ii == 0: fn = toadiagfn else: fn = "%s_%d" % (toadiagfn, ii+1) shutil.move(fn, os.path.join(diagdir, outfn)) ins = db.toa_diagnostic_plots.insert() values.append({'toa_id': toa_id, 'filename': outfn, 'filepath': diagdir, 'plot_type': 'Prof-Temp Resids'}) result = db.execute(ins, values) result.close() notify.print_info("Inserted %d TOA diagnostic plots." % len(toa_ids), 2)
"Rolling back DB transaction!\n", 'error')) raise else: # No exceptions encountered # Commit database transaction db.commit() finally: # Clean up for fn in [adjustfn, manipfn]: if os.path.isfile(fn): os.remove(fn) # End pipeline print "###################################################" print random.choice(SUCCESSMSGS) print "End time: %s" % utils.give_utc_now() print "###################################################" # Close DB connection if not existdb: db.close() def reduce_rawfile(args, leftover_args=[], existdb=None): if args.rawfile is not None: notify.print_info("Loading rawfile %s" % args.rawfile, 1) args.rawfile_id = load_rawfile.load_rawfile(args.rawfile, existdb) elif args.rawfile_id is None: # Neither a rawfile, nor a rawfile_id was provided raise errors.BadInputError("Either a rawfile, or a rawfile_id " "_must_ be provided!")
def plot_rawfiles(rawfiles): import matplotlib.pyplot as plt import matplotlib # Set default parameters plt.rc('xtick', labelsize='x-small') plt.rc('ytick', labelsize='x-small') plt.rc('axes', labelsize='small') plt.rc('font', family='sans-serif') fig = plt.figure(figsize=(10,8)) titletext = plt.figtext(0.025, 0.975, "Raw file Summary", \ size='xx-large', ha='left', va='top') db = database.Database() # Get database info, but don't connect dbtext = plt.figtext(0.025, 0.025, "Database (%s): %s" % \ (db.engine.name, db.engine.url.database), \ size='x-small', ha='left', va='bottom') timetext = plt.figtext(0.0275, 0.9425, utils.give_utc_now(), \ size='xx-small', ha='left', va='top') # Compute data for plotting numfiles = 0 size = 0 length = 0 mjds = np.empty(len(rawfiles)) lengths = np.empty(len(rawfiles)) bws = np.empty(len(rawfiles)) freqs = np.empty(len(rawfiles)) obsids = np.empty(len(rawfiles)) add_times = [] telescopes = {} band_descriptors = {} pulsars = {} for ii, rawfile in enumerate(rawfiles): numfiles += 1 size += rawfile['filesize'] secs = rawfile['length'] length += secs length_day = secs/86400.0 mjds[ii] = rawfile['mjd']+length_day/2.0 lengths[ii] = length_day bws[ii] = rawfile['bw'] freqs[ii] = rawfile['freq'] obsids[ii] = rawfile['obssystem_id'] add_times.append(rawfile['add_time']) tname = rawfile['telescope_name'] telescopes[tname] = telescopes.get(tname, 0) + 1 band = rawfile['band_descriptor'] band_descriptors[band] = band_descriptors.get(band, 0) + 1 psr = rawfile['pulsar_name'] psrcnt, psrhr = pulsars.get(psr, (0, 0)) pulsars[psr] = (psrcnt+1, psrhr+secs/3600.0) add_times = np.asarray(sorted(add_times+[datetime.datetime.utcnow()])) plt.figtext(0.05, 0.9, "Total number of files archived: %d" % numfiles, \ ha='left', size='medium') unit = 's' thresh = 60.0 other_thresh = [365.0, 24.0, 60.0] other_units = ['years', 'days', 'hr', 'min'] while length >= thresh and len(other_units) > 1: length /= thresh thresh = other_thresh.pop() unit = other_units.pop() plt.figtext(0.05, 0.875, "Total integration time: %.2g %s" % \ (length, unit), \ ha='left', size='medium') unit = 'bytes' other_units = ['TB', 'GB', 'MB', 'KB'] while size >= 1024.0 and len(other_units) > 1: size /= 1024.0 unit = other_units.pop() plt.figtext(0.05, 0.85, "Total disk space used: %.2f %s" % \ (size, unit), \ ha='left', size='medium') #cnorm = matplotlib.colors.Normalize(obsids.min(), obsids.max()) #cmap = plt.get_cmap('gist_rainbow') ax = plt.axes((0.1, 0.375, 0.45, 0.15)) #plt.errorbar(mjds, freqs, xerr=lengths/2.0, yerr=bws/2.0, \ # ls='None', ecolor='k') #for ii in xrange(len(rows)): # ellipse = matplotlib.patches.Ellipse((mjds[ii], freqs[ii]), \ # width=lengths[ii], height=bws[ii], \ # ec='none', fc=cmap(cnorm(obsids[ii])), \ # alpha=0.9) # ax.add_patch(ellipse) plt.scatter(mjds, freqs, marker='o', alpha=0.7, c=obsids) mjd_range = mjds.ptp() plt.xlim(mjds.min()-0.1*mjd_range, mjds.max()+0.1*mjd_range) freq_range = freqs.ptp() plt.ylim(freqs.min()-0.1*freq_range, freqs.max()+0.1*freq_range) plt.xlabel("MJD") plt.ylabel("Freq (MHz)") fmt = matplotlib.ticker.ScalarFormatter(useOffset=False) fmt.set_scientific(False) ax.xaxis.set_major_formatter(fmt) ax.yaxis.set_major_formatter(fmt) ax = plt.axes((0.1, 0.15, 0.45, 0.15)) plt.plot(add_times, np.arange(len(add_times)), 'k-', drawstyle='steps') plt.xlabel("Add date") plt.ylabel("Num. files\narchived") ax.fmt_xdata = matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M") fig.autofmt_xdate() loc = matplotlib.dates.AutoDateLocator() fmt = matplotlib.dates.AutoDateFormatter(loc) #fmt.scaled[1./24.] = '%a, %I:%M%p' ax.xaxis.set_major_locator(loc) ax.xaxis.set_major_formatter(fmt) plt.ylim(0, len(add_times)*1.1) plt.title("Total number of raw files archived", size='small') plt.xticks(rotation=30, ha='right') # Make pie charts # Break down by telescope tnames = [] labels = [] counts = [] for t, cnt in telescopes.iteritems(): labels.append("%s: %d" % (t, cnt)) tnames.append(t) counts.append(cnt) ax = plt.axes((0.35, 0.55, 0.25, 0.25)) plt.axis('equal') #tel_pie = plt.pie(counts, labels=labels, colors=colours, autopct='%.1f %%') tel_pie = plt.pie(counts, labels=labels, autopct='%.1f %%') plt.setp(tel_pie[1]+tel_pie[2], size='xx-small') plt.title("Num. raw files by telescope", size='small') # Break down by observing band bands = [] counts = [] labels = [] for b, cnt in band_descriptors.iteritems(): bands.append(b) counts.append(cnt) labels.append("%s: %d" % (b, cnt)) ax = plt.axes((0.05, 0.55, 0.25, 0.25)) plt.axis('equal') #tel_pie = plt.pie(counts, labels=labels, colors=colours, autopct='%.1f %%') band_pie = plt.pie(counts, labels=labels, autopct='%.1f %%') plt.setp(band_pie[1]+band_pie[2], size='xx-small') plt.title("Num. raw files by observing band", size='small') psrs = [] counts = [] hours = [] for pp in sorted(pulsars.keys(), reverse=True): cnt, hr = pulsars[pp] psrs.append(pp) counts.append(cnt) hours.append(hr) ipsr = np.arange(len(psrs)) psrtime_ax = plt.axes((0.83, 0.05, 0.12, 0.9)) psrtime_bar = plt.barh(ipsr, hours, \ align='center', lw=0, fc='#B22222', \ alpha=0.7, ec='k') plt.xlim(0, np.max(hours)*1.1) plt.xlabel("Hours") plt.setp(psrtime_ax.yaxis.get_ticklabels(), visible=False) plt.title("Obs. time", size='small') psrcnt_ax = plt.axes((0.7, 0.05, 0.12, 0.9), sharey=psrtime_ax) psrcnt_bar = plt.barh(ipsr, counts, \ align='center', lw=0, fc='#008080', \ alpha=0.7, ec='k') plt.xlim(0, np.max(counts)*1.1) plt.ylim(-0.5,len(psrs)-0.5) plt.yticks(ipsr, psrs, rotation=0, \ va='center', ha='right') plt.title("# of archives", size='small')
def archive_file(toarchive, destdir): if not config.cfg.archive: # Configured to not archive files warnings.warn("Configurations are set to _not_ archive files. " "Doing nothing...", errors.ToasterWarning) return toarchive srcdir, fn = os.path.split(toarchive) dest = os.path.join(destdir, fn) # Check if the directory exists # If not, create it if not os.path.isdir(destdir): # Set permissions (in octal) to read/write/execute for user and group notify.print_info("Making directory: %s" % destdir, 2) os.makedirs(destdir, 0770) # Check that our file doesn't already exist in 'dest' # If it does exist do nothing but print a warning if not os.path.isfile(dest): # Copy file to 'dest' notify.print_info("Moving %s to %s" % (toarchive, dest), 2) shutil.copy2(toarchive, dest) # Check that file copied successfully srcmd5 = get_md5sum(toarchive) srcsize = os.path.getsize(toarchive) destmd5 = get_md5sum(dest) destsize = os.path.getsize(dest) if (srcmd5 == destmd5) and (srcsize == destsize): if config.cfg.move_on_archive: os.remove(toarchive) notify.print_info("File (%s) successfully moved to %s." % (toarchive, dest), 2) else: notify.print_info("File (%s) successfully copied to %s." % (toarchive, dest), 2) else: raise errors.ArchivingError("File copy failed! (Source MD5: %s, " "Dest MD5: %s; Source size: %d, Dest size: %d)" % (srcmd5, destmd5, srcsize, destmd5)) elif os.path.abspath(destdir) == os.path.abspath(srcdir): # File is already located in its destination # Do nothing warnings.warn("Source file %s is already in the archive (and in " "the correct place). Doing nothing..." % toarchive, errors.ToasterWarning) pass else: # Another file with the same name is the destination directory # Compare the files srcmd5 = get_md5sum(toarchive) srcsize = os.path.getsize(toarchive) destmd5 = get_md5sum(dest) destsize = os.path.getsize(dest) if (srcmd5 == destmd5) and (srcsize == destsize): # Files are the same, so remove src as if we moved it # (taking credit for work that was already done...) warnings.warn("Another version of this file (%s), with " "the same size (%d bytes) and the same " "MD5 (%s) is already in the archive. " "Doing nothing..." % (toarchive, destsize, destmd5), errors.ToasterWarning) else: # The files are not the same! This is not good. # Raise an exception. raise errors.ArchivingError("File (%s) cannot be archived. " "There is already a file archived by that name " "in the appropriate archive location (%s), but " "the two files are _not_ identical. " "(source: MD5=%s, size=%d bytes; dest: MD5=%s, " "size=%d bytes)" % (toarchive, dest, srcmd5, srcsize, destmd5, destsize)) # Change permissions so the file can no longer be written to notify.print_info("Changing permissions of archived file to 440", 2) os.chmod(dest, 0440) # "0440" is an integer in base 8. It works # the same way 440 does for chmod on cmdline notify.print_info("%s archived to %s (%s)" % (toarchive, dest, utils.give_utc_now()), 1) return dest