def extract_zeroth_group(self, filename): """Extracts the 0th group of a fits image and outputs it into a new fits file. Parameters ---------- filename : str The fits file from which the 0th group will be extracted. Returns ------- output_filename : str The full path to the output file. """ output_filename = os.path.join( self.data_dir, os.path.basename(filename).replace('.fits', '_0thgroup.fits')) # Write a new fits file containing the primary and science # headers from the input file, as well as the 0th group # data of the first integration hdu = fits.open(filename) new_hdu = fits.HDUList([hdu['PRIMARY'], hdu['SCI']]) new_hdu['SCI'].data = hdu['SCI'].data[0:1, 0:1, :, :] new_hdu.writeto(output_filename, overwrite=True) hdu.close() new_hdu.close() set_permissions(output_filename) logging.info('\t{} created'.format(output_filename)) return output_filename
def configure_logging(module, production_mode=True, path='./'): """Configure the log file with a standard logging format. Parameters ---------- module : str The name of the module being logged. production_mode : bool Whether or not the output should be written to the production environement. path : str Where to write the log if user-supplied path; default to working dir. """ # Determine log file location if production_mode: log_file = make_log_file(module) else: log_file = make_log_file(module, production_mode=False, path=path) global LOG_FILE_LOC global PRODUCTION_BOOL LOG_FILE_LOC = log_file PRODUCTION_BOOL = production_mode # Create the log file and set the permissions logging.basicConfig(filename=log_file, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%m/%d/%Y %H:%M:%S %p', level=logging.INFO) set_permissions(log_file)
def configure_logging(module): """Configure the log file with a standard logging format. Parameters ---------- module : str The name of the module being logged. production_mode : bool Whether or not the output should be written to the production environement. path : str Where to write the log if user-supplied path; default to working dir. Returns ------- log_file : str The path to the file where the log is written to. """ # Determine log file location log_file = make_log_file(module) # Make sure no other root lhandlers exist before configuring the logger for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) # Create the log file and set the permissions logging.basicConfig(filename=log_file, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%m/%d/%Y %H:%M:%S %p', level=logging.INFO) print('Log file initialized to {}'.format(log_file)) set_permissions(log_file) return log_file
def save_image(self, fname, thumbnail=False): """ Save an image in the requested output format and sets the appropriate permissions Parameters ---------- image : obj A ``matplotlib`` figure object fname : str Output filename thumbnail : bool True if saving a thumbnail image, false for the full preview image. """ plt.savefig(fname, bbox_inches='tight', pad_inches=0) permissions.set_permissions(fname) # If the image is a thumbnail, rename to '.thumb' if thumbnail: thumb_fname = fname.replace('.jpg', '.thumb') os.rename(fname, thumb_fname) logging.info('\tSaved image to {}'.format(thumb_fname)) else: logging.info('\tSaved image to {}'.format(fname))
def test_file_group(test_file): """Create a file with the standard permissions ``('-rw-r--r--')`` and default group. Modify the group and set the default permissions defined in ``permissions.py``. Assert that both group and permissions were set correctly. Parameters ---------- test_file : str Path of file used for testing """ # Get owner and group on the current system. owner = get_owner_string(test_file) group = get_group_string(test_file) # attempt to retrieve a group name different from default group_index = 0 test_group = grp.getgrgid(os.getgroups()[group_index]).gr_name set_permissions(test_file, group=test_group, owner=owner) assert has_permissions(test_file, group=test_group, owner=owner) # return to default group set_permissions(test_file, owner=owner, group=group) assert has_permissions(test_file, owner=owner, group=group)
def run_early_pipeline(self, filename, odd_even_rows=False, odd_even_columns=True, use_side_ref_pixels=True, group_scale=False): """Runs the early steps of the jwst pipeline (dq_init, saturation, superbias, refpix) on uncalibrated files and outputs the result. Parameters ---------- filename : str File on which to run the pipeline steps odd_even_rows : bool Option to treat odd and even rows separately during refpix step odd_even_columns : bools Option to treat odd and even columns separately during refpix step use_side_ref_pixels : bool Option to perform the side refpix correction during refpix step group_scale : bool Option to rescale pixel values to correct for instances where on-board frame averaging did not result in the proper values Returns ------- output_filename : str The full path to the calibrated file """ output_filename = filename.replace('_uncal', '').replace( '.fits', '_superbias_refpix.fits') if not os.path.isfile(output_filename): # Run the group_scale and dq_init steps on the input file if group_scale: model = GroupScaleStep.call(filename) model = DQInitStep.call(model) else: model = DQInitStep.call(filename) # Run the saturation and superbias steps model = SaturationStep.call(model) model = SuperBiasStep.call(model) # Run the refpix step and save the output model = RefPixStep.call(model, odd_even_rows=odd_even_rows, odd_even_columns=odd_even_columns, use_side_ref_pixels=use_side_ref_pixels) model.save(output_filename) set_permissions(output_filename) else: logging.info('\t{} already exists'.format(output_filename)) return output_filename
def save_mean_slope_image(self, slope_img, stdev_img, files): """Save the mean slope image and associated stdev image to a file Parameters ---------- slope_img : numpy.ndarray 2D array containing the mean slope image stdev_img : numpy.ndarray 2D array containing the stdev image associated with the mean slope image. files : list List of input files used to construct the mean slope image Returns ------- output_filename : str Name of fits file to save mean and stdev images within """ output_filename = '{}_{}_{}_to_{}_mean_slope_image.fits'.format( self.instrument.lower(), self.aperture.lower(), self.query_start, self.query_end) mean_slope_dir = os.path.join(get_config()['outputs'], 'dark_monitor', 'mean_slope_images') ensure_dir_exists(mean_slope_dir) output_filename = os.path.join(mean_slope_dir, output_filename) logging.info("Name of mean slope image: {}".format(output_filename)) primary_hdu = fits.PrimaryHDU() primary_hdu.header['INSTRUME'] = (self.instrument, 'JWST instrument') primary_hdu.header['APERTURE'] = (self.aperture, 'Aperture name') primary_hdu.header['QRY_STRT'] = (self.query_start, 'MAST Query start time (MJD)') primary_hdu.header['QRY_END'] = (self.query_end, 'MAST Query end time (MJD)') files_string = 'FILES USED: ' for filename in files: files_string += '{}, '.format(filename) primary_hdu.header.add_history(files_string) mean_img_hdu = fits.ImageHDU(slope_img, name='MEAN') stdev_img_hdu = fits.ImageHDU(stdev_img, name='STDEV') hdu_list = fits.HDUList([primary_hdu, mean_img_hdu, stdev_img_hdu]) hdu_list.writeto(output_filename, overwrite=True) set_permissions(output_filename) return output_filename
def image_to_png(self, image, outname): """Ouputs an image array into a png file. Parameters ---------- image : numpy.ndarray 2D image array outname : str The name given to the output png file Returns ------- output_filename : str The full path to the output png file """ output_filename = os.path.join(self.data_dir, '{}.png'.format(outname)) if not os.path.isfile(output_filename): # Get image scale limits z = ZScaleInterval() vmin, vmax = z.get_limits(image) # Plot the image plt.figure(figsize=(12, 12)) ax = plt.gca() im = ax.imshow(image, cmap='gray', origin='lower', vmin=vmin, vmax=vmax) ax.set_title('{}'.format(outname)) # Make the colorbar divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.4) cbar = plt.colorbar(im, cax=cax) cbar.set_label('Signal [DN]') plt.savefig(output_filename, bbox_inches='tight', dpi=200) set_permissions(output_filename) logging.info('\t{} created'.format(output_filename)) else: logging.info('\t{} already exists'.format(output_filename)) return output_filename
def test_file_permissions(test_file): """Create a file with the standard permissions ``('-rw-r--r--')``. Set the default permissions defined in ``permissions.py``. Assert that these were set correctly. Parameters ---------- test_file : str Path of file used for testing """ # Get owner and group on the current system. owner = get_owner_string(test_file) group = get_group_string(test_file) set_permissions(test_file, owner=owner, group=group) assert has_permissions(test_file, owner=owner, group=group)
def image_to_png(self, image, outname): """Outputs an image array into a png file. Parameters ---------- image : numpy.ndarray 2D image array. outname : str The name given to the output png file. Returns ------- output_filename : str The full path to the output png file. """ output_filename = os.path.join(self.data_dir, '{}.png'.format(outname)) # Get image scale limits zscale = ZScaleInterval() vmin, vmax = zscale.get_limits(image) # Plot the image plt.figure(figsize=(12, 12)) im = plt.imshow(image, cmap='gray', origin='lower', vmin=vmin, vmax=vmax) plt.colorbar( im, label='Readnoise Difference (most recent dark - reffile) [DN]') plt.title('{}'.format(outname)) # Save the figure plt.savefig(output_filename, bbox_inches='tight', dpi=200, overwrite=True) set_permissions(output_filename) logging.info('\t{} created'.format(output_filename)) return output_filename
def test_directory_permissions(test_directory): """Create a directory with the standard permissions ``('-rw-r--r--')``. Set the default permissions defined in ``permissions.py``. Assert that these were set correctly. Parameters ---------- test_directory : str Path of directory used for testing """ # Get owner and group on the current system.This allows to implement the tests # independently from the user. owner = get_owner_string(test_directory) group = get_group_string(test_directory) set_permissions(test_directory, owner=owner, group=group) assert has_permissions(test_directory, owner=owner, group=group)
def copy_files(files, out_dir): """Copy a given file to a given directory. Only try to copy the file if it is not already present in the output directory. Parameters ---------- files : list List of files to be copied out_dir : str Destination directory Returns ------- success : list Files successfully copied (or that already existed in out_dir) failed : list Files that were not copied """ # Copy files if they do not already exist success = [] failed = [] for input_file in files: input_new_path = os.path.join(out_dir, os.path.basename(input_file)) if os.path.isfile(input_new_path): success.append(input_new_path) else: try: shutil.copy2(input_file, out_dir) success.append(input_new_path) permissions.set_permissions(input_new_path) except: failed.append(input_file) return success, failed
def plot_filesystem_stats(): """ Plot various filesystem statistics using ``bokeh`` and save them to the output directory. """ p1 = plot_total_file_counts() p2 = plot_filesystem_size() p3 = plot_by_filetype('count', 'all') p4 = plot_by_filetype('size', 'all') plot_list = [p1, p2, p3, p4] for instrument in JWST_INSTRUMENT_NAMES: plot_list.append(plot_by_filetype('count', instrument)) plot_list.append(plot_by_filetype('size', instrument)) # Create a layout with a grid pattern grid_chunks = [plot_list[i:i + 2] for i in range(0, len(plot_list), 2)] grid = gridplot(grid_chunks) # Save all of the plots in one file outputs_dir = os.path.join(get_config()['outputs'], 'monitor_filesystem') outfile = os.path.join(outputs_dir, 'filesystem_monitor.html') output_file(outfile) save(grid) set_permissions(outfile) logging.info('Saved plot of all statistics to {}'.format(outfile)) # Save each plot's components for plot in plot_list: plot_name = plot.title.text.lower().replace(' ', '_') plot.sizing_mode = 'stretch_both' script, div = components(plot) div_outfile = os.path.join(outputs_dir, "{}_component.html".format(plot_name)) with open(div_outfile, 'w') as f: f.write(div) f.close() set_permissions(div_outfile) script_outfile = os.path.join(outputs_dir, "{}_component.js".format(plot_name)) with open(script_outfile, 'w') as f: f.write(script) f.close() set_permissions(script_outfile) logging.info( 'Saved components files: {}_component.html and {}_component.js'. format(plot_name, plot_name)) logging.info('Filesystem statistics plotting complete.')
def process(self, file_list): """The main method for processing darks. See module docstrings for further details. Parameters ---------- file_list : list List of filenames (including full paths) to the dark current files. """ for filename in file_list: logging.info('\tWorking on file: {}'.format(filename)) # Get relevant header information for this file self.get_metadata(filename) # Run the file through the necessary pipeline steps pipeline_steps = self.determine_pipeline_steps() logging.info('\tRunning pipeline on {}'.format(filename)) try: processed_file = pipeline_tools.run_calwebb_detector1_steps( filename, pipeline_steps) logging.info( '\tPipeline complete. Output: {}'.format(processed_file)) set_permissions(processed_file) except: logging.info( '\tPipeline processing failed for {}'.format(filename)) continue # Find amplifier boundaries so per-amp statistics can be calculated _, amp_bounds = instrument_properties.amplifier_info( processed_file, omit_reference_pixels=True) logging.info('\tAmplifier boundaries: {}'.format(amp_bounds)) # Get the ramp data; remove first 5 groups and last group for MIRI to avoid reset/rscd effects cal_data = fits.getdata(processed_file, 'SCI', uint=False) if self.instrument == 'MIRI': cal_data = cal_data[:, 5:-1, :, :] # Make the readnoise image readnoise_outfile = os.path.join( self.data_dir, os.path.basename( processed_file.replace('.fits', '_readnoise.fits'))) readnoise = self.make_readnoise_image(cal_data) fits.writeto(readnoise_outfile, readnoise, overwrite=True) logging.info( '\tReadnoise image saved to {}'.format(readnoise_outfile)) # Calculate the full image readnoise stats clipped = sigma_clip(readnoise, sigma=3.0, maxiters=5) full_image_mean, full_image_stddev = np.nanmean( clipped), np.nanstd(clipped) full_image_n, full_image_bin_centers = self.make_histogram( readnoise) logging.info('\tReadnoise image stats: {:.5f} +/- {:.5f}'.format( full_image_mean, full_image_stddev)) # Calculate readnoise stats in each amp separately amp_stats = self.get_amp_stats(readnoise, amp_bounds) logging.info( '\tReadnoise image stats by amp: {}'.format(amp_stats)) # Get the current JWST Readnoise Reference File data parameters = self.make_crds_parameter_dict() reffile_mapping = crds.getreferences(parameters, reftypes=['readnoise']) readnoise_file = reffile_mapping['readnoise'] if 'NOT FOUND' in readnoise_file: logging.warning( '\tNo pipeline readnoise reffile match for this file - assuming all zeros.' ) pipeline_readnoise = np.zeros(readnoise.shape) else: logging.info('\tPipeline readnoise reffile is {}'.format( readnoise_file)) pipeline_readnoise = fits.getdata(readnoise_file) # Find the difference between the current readnoise image and the pipeline readnoise reffile, and record image stats. # Sometimes, the pipeline readnoise reffile needs to be cutout to match the subarray. pipeline_readnoise = pipeline_readnoise[self.substrt2 - 1:self.substrt2 + self.subsize2 - 1, self.substrt1 - 1:self.substrt1 + self.subsize1 - 1] readnoise_diff = readnoise - pipeline_readnoise clipped = sigma_clip(readnoise_diff, sigma=3.0, maxiters=5) diff_image_mean, diff_image_stddev = np.nanmean( clipped), np.nanstd(clipped) diff_image_n, diff_image_bin_centers = self.make_histogram( readnoise_diff) logging.info( '\tReadnoise difference image stats: {:.5f} +/- {:.5f}'.format( diff_image_mean, diff_image_stddev)) # Save a png of the readnoise difference image for visual inspection logging.info('\tCreating png of readnoise difference image') readnoise_diff_png = self.image_to_png( readnoise_diff, outname=os.path.basename(readnoise_outfile).replace( '.fits', '_diff')) # Construct new entry for this file for the readnoise database table. # Can't insert values with numpy.float32 datatypes into database # so need to change the datatypes of these values. readnoise_db_entry = { 'uncal_filename': filename, 'aperture': self.aperture, 'detector': self.detector, 'subarray': self.subarray, 'read_pattern': self.read_pattern, 'nints': self.nints, 'ngroups': self.ngroups, 'expstart': self.expstart, 'readnoise_filename': readnoise_outfile, 'full_image_mean': float(full_image_mean), 'full_image_stddev': float(full_image_stddev), 'full_image_n': full_image_n.astype(float), 'full_image_bin_centers': full_image_bin_centers.astype(float), 'readnoise_diff_image': readnoise_diff_png, 'diff_image_mean': float(diff_image_mean), 'diff_image_stddev': float(diff_image_stddev), 'diff_image_n': diff_image_n.astype(float), 'diff_image_bin_centers': diff_image_bin_centers.astype(float), 'entry_date': datetime.datetime.now() } for key in amp_stats.keys(): if isinstance(amp_stats[key], (int, float)): readnoise_db_entry[key] = float(amp_stats[key]) else: readnoise_db_entry[key] = amp_stats[key].astype(float) # Add this new entry to the readnoise database table self.stats_table.__table__.insert().execute(readnoise_db_entry) logging.info('\tNew entry added to readnoise database table') # Remove the raw and calibrated files to save memory space os.remove(filename) os.remove(processed_file)
def jwst_inventory(instruments=JWST_INSTRUMENT_NAMES, dataproducts=['image', 'spectrum', 'cube'], caom=False, plot=False): """Gather a full inventory of all JWST data in each instrument service by instrument/dtype Parameters ---------- instruments: sequence The list of instruments to count dataproducts: sequence The types of dataproducts to count caom: bool Query CAOM service plot: bool Return a pie chart of the data Returns ------- astropy.table.table.Table The table of record counts for each instrument and mode """ logging.info('Searching database...') # Iterate through instruments inventory, keywords = [], {} for instrument in instruments: ins = [instrument] for dp in dataproducts: count = instrument_inventory(instrument, dataproduct=dp, caom=caom) ins.append(count) # Get the total ins.append(sum(ins[-3:])) # Add it to the list inventory.append(ins) # Add the keywords to the dict keywords[instrument] = instrument_keywords(instrument, caom=caom) logging.info( 'Completed database search for {} instruments and {} data products.'. format(instruments, dataproducts)) # Make the table all_cols = ['instrument'] + dataproducts + ['total'] table = pd.DataFrame(inventory, columns=all_cols) # Plot it if plot: # Determine plot location and names output_dir = get_config()['outputs'] if caom: output_filename = 'database_monitor_caom' else: output_filename = 'database_monitor_jwst' # Make the plot plt = bar_chart(table, 'instrument', dataproducts, title="JWST Inventory") # Save the plot as full html html_filename = output_filename + '.html' outfile = os.path.join(output_dir, 'monitor_mast', html_filename) output_file(outfile) save(plt) set_permissions(outfile) logging.info( 'Saved Bokeh plots as HTML file: {}'.format(html_filename)) # Save the plot as components plt.sizing_mode = 'stretch_both' script, div = components(plt) div_outfile = os.path.join(output_dir, 'monitor_mast', output_filename + "_component.html") with open(div_outfile, 'w') as f: f.write(div) f.close() set_permissions(div_outfile) script_outfile = os.path.join(output_dir, 'monitor_mast', output_filename + "_component.js") with open(script_outfile, 'w') as f: f.write(script) f.close() set_permissions(script_outfile) logging.info( 'Saved Bokeh components files: {}_component.html and {}_component.js' .format(output_filename, output_filename)) # Melt the table table = pd.melt(table, id_vars=['instrument'], value_vars=dataproducts, value_name='files', var_name='dataproduct') return table, keywords
def ensure_dir_exists(fullpath): """Creates dirs from ``fullpath`` if they do not already exist.""" if not os.path.exists(fullpath): os.makedirs(fullpath) permissions.set_permissions(fullpath)
def create_table(status_dict): """Create interactive ``bokeh`` table containing the logfile status results. Parameters ---------- status_dict : dict Nested dictionary with status results from all logfiles """ # Rearrange the nested dictionary into a non-nested dict for the table filenames = [] dates = [] missings = [] results = [] for key in status_dict: filenames.append(status_dict[key]['logname']) dates.append(datetime.fromtimestamp(status_dict[key]['latest_time'])) missings.append(str(status_dict[key]['missing_file'])) results.append(status_dict[key]['status']) # div to color the boxes in the status column success_template = """ <div style="background:<%= (function colorfromstr(){ if(value == "success"){ return("green")} else{return("red")} }()) %>; color: white"> <%= value %></div> """ # div to color the boxes in the column for possibly late logfiles missing_template = """ <div style="background:<%= (function colorfrombool(){ if(value == "True"){ return("orange")} else{return("green")} }()) %>; color: white"> <%= value %></div> """ success_formatter = HTMLTemplateFormatter(template=success_template) missing_formatter = HTMLTemplateFormatter(template=missing_template) data = dict(name=list(status_dict.keys()), filename=filenames, date=dates, missing=missings, result=results) source = ColumnDataSource(data) datefmt = DateFormatter(format="RFC-2822") columns = [ TableColumn(field="name", title="Monitor Name", width=200), TableColumn(field="filename", title="Most Recent File", width=350), TableColumn(field="date", title="Most Recent Time", width=200, formatter=datefmt), TableColumn(field="missing", title="Possible Missing File", width=200, formatter=missing_formatter), TableColumn(field="result", title="Status", width=100, formatter=success_formatter), ] data_table = DataTable(source=source, columns=columns, width=800, height=280, index_position=None) # Get output directory for saving the table files output_dir = get_config()['outputs'] output_filename = 'cron_status_table' # Save full html html_outfile = os.path.join(output_dir, 'monitor_cron_jobs', '{}.html'.format(output_filename)) output_file(html_outfile) save(data_table) try: set_permissions(html_outfile) except PermissionError: logging.warning( 'Unable to set permissions for {}'.format(html_outfile)) logging.info('Saved Bokeh full HTML file: {}'.format(html_outfile))
def monitor_template_main(): """ The main function of the ``monitor_template`` module.""" # Example of logging my_variable = 'foo' logging.info('Some useful information: {}'.format(my_variable)) # Example of querying for a dataset via MAST API service = "Mast.Jwst.Filtered.Niriss" params = { "columns": "filename", "filters": [{ "paramName": "filter", "values": ['F430M'] }] } response = Mast.service_request_async(service, params) result = response[0].json()['data'] filename_of_interest = result[0][ 'filename'] # jw00304002001_02102_00001_nis_uncal.fits # Example of parsing a filename filename_dict = filename_parser(filename_of_interest) # Contents of filename_dict: # {'program_id': '00304', # 'observation': '002', # 'visit': '001', # 'visit_group': '02', # 'parallel_seq_id': '1', # 'activity': '02', # 'exposure_id': '00001', # 'detector': 'nis', # 'suffix': 'uncal'} # Example of locating a dataset in the filesystem filesystem = SETTINGS['filesystem'] dataset = os.path.join( filesystem, 'public', 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), filename_of_interest) # Example of reading in dataset using jwst.datamodels im = datamodels.open(dataset) # Now have access to: # im.data # Data array # im.err # ERR array # im.meta # Metadata such as header keywords # Example of saving a file and setting permissions im.save('some_filename.fits') set_permissions('some_filename.fits') # Example of creating and exporting a Bokeh plot ylen, xlen = im.data.shape plt = figure(x_range=(0, xlen), y_range=(0, ylen)) plt.image(image=[im.data], x=0, y=0, dw=2, dh=2, palette="Spectral11") plt.sizing_mode = 'stretch_both' # Necessary for responsive sizing on web app script, div = components(plt) plot_output_dir = SETTINGS['outputs'] div_outfile = os.path.join(plot_output_dir, 'monitor_name', filename_of_interest + "_component.html") script_outfile = os.path.join(plot_output_dir, 'monitor_name', filename_of_interest + "_component.js") for outfile, component in zip([div_outfile, script_outfile], [div, script]): with open(outfile, 'w') as f: f.write(component) f.close() set_permissions(outfile) # Perform any other necessary code well_named_variable = "Function does something." result_of_second_function = second_function(well_named_variable)
def plot_system_stats(stats_file, filebytype, sizebytype): """Read in the file of saved stats over time and plot them. Parameters ----------- stats_file : str file containing information of stats over time filebytype : str file containing information of file counts by type over time sizebytype : str file containing information on file sizes by type over time """ # get path for files settings = get_config() outputs_dir = os.path.join(settings['outputs'], 'monitor_filesystem') # read in file of statistics date, f_count, sysize, frsize, used, percent = np.loadtxt(stats_file, dtype=str, unpack=True) fits_files, uncalfiles, calfiles, ratefiles, rateintsfiles, i2dfiles, nrcfiles, nrsfiles, nisfiles, mirfiles, fgsfiles = np.loadtxt( filebytype, dtype=str, unpack=True) fits_sz, uncal_sz, cal_sz, rate_sz, rateints_sz, i2d_sz, nrc_sz, nrs_sz, nis_sz, mir_sz, fgs_sz = np.loadtxt( sizebytype, dtype=str, unpack=True) logging.info('Read in file statistics from {}, {}, {}'.format( stats_file, filebytype, sizebytype)) # put in proper np array types and convert to GB sizes dates = np.array(date, dtype='datetime64') file_count = f_count.astype(float) systemsize = sysize.astype(float) / (1024.**3) freesize = frsize.astype(float) / (1024.**3) usedsize = used.astype(float) / (1024.**3) fits = fits_files.astype(int) uncal = uncalfiles.astype(int) cal = calfiles.astype(int) rate = ratefiles.astype(int) rateints = rateintsfiles.astype(int) i2d = i2dfiles.astype(int) nircam = nrcfiles.astype(int) nirspec = nrsfiles.astype(int) niriss = nisfiles.astype(int) miri = mirfiles.astype(int) fgs = fgsfiles.astype(int) fits_size = fits_sz.astype(float) / (1024.**3) uncal_size = uncal_sz.astype(float) / (1024.**3) cal_size = cal_sz.astype(float) / (1024.**3) rate_size = rate_sz.astype(float) / (1024.**3) rateints_size = rateints_sz.astype(float) / (1024.**3) i2d_size = i2d_sz.astype(float) / (1024.**3) nircam_size = nrc_sz.astype(float) / (1024.**3) nirspec_size = nrs_sz.astype(float) / (1024.**3) niriss_size = nis_sz.astype(float) / (1024.**3) miri_size = mir_sz.astype(float) / (1024.**3) fgs_size = fgs_sz.astype(float) / (1024.**3) # plot the data # Plot filecount vs. date p1 = figure(tools='pan,box_zoom,reset,wheel_zoom,save', x_axis_type='datetime', title="Total File Counts", x_axis_label='Date', y_axis_label='Count') p1.line(dates, file_count, line_width=2, line_color='blue') p1.circle(dates, file_count, color='blue') # Plot system stats vs. date p2 = figure(tools='pan,box_zoom,wheel_zoom,reset,save', x_axis_type='datetime', title='System stats', x_axis_label='Date', y_axis_label='GB') p2.line(dates, systemsize, legend='Total size', line_color='red') p2.circle(dates, systemsize, color='red') p2.line(dates, freesize, legend='Free bytes', line_color='blue') p2.circle(dates, freesize, color='blue') p2.line(dates, usedsize, legend='Used bytes', line_color='green') p2.circle(dates, usedsize, color='green') # Plot fits files by type vs. date p3 = figure(tools='pan,box_zoom,wheel_zoom,reset,save', x_axis_type='datetime', title="Total File Counts by Type", x_axis_label='Date', y_axis_label='Count') p3.line(dates, fits, legend='Total fits files', line_color='black') p3.circle(dates, fits, color='black') p3.line(dates, uncal, legend='uncalibrated fits files', line_color='red') p3.diamond(dates, uncal, color='red') p3.line(dates, cal, legend='calibrated fits files', line_color='blue') p3.square(date, cal, color='blue') p3.line(dates, rate, legend='rate fits files', line_color='green') p3.triangle(dates, rate, color='green') p3.line(dates, rateints, legend='rateints fits files', line_color='orange') p3.asterisk(dates, rateints, color='orange') p3.line(dates, i2d, legend='i2d fits files', line_color='purple') p3.x(dates, i2d, color='purple') p3.line(dates, nircam, legend='nircam fits files', line_color='midnightblue') p3.x(dates, nircam, color='midnightblue') p3.line(dates, nirspec, legend='nirspec fits files', line_color='springgreen') p3.x(dates, nirspec, color='springgreen') p3.line(dates, niriss, legend='niriss fits files', line_color='darkcyan') p3.x(dates, niriss, color='darkcyan') p3.line(dates, miri, legend='miri fits files', line_color='dodgerblue') p3.x(dates, miri, color='dodgerblue') p3.line(dates, fgs, legend='fgs fits files', line_color='darkred') p3.x(dates, fgs, color='darkred') # plot size of total fits files by type p4 = figure(tools='pan,box_zoom,wheel_zoom,reset,save', x_axis_type='datetime', title="Total File Sizes by Type", x_axis_label='Date', y_axis_label='GB') p4.line(dates, fits_size, legend='Total fits files', line_color='black') p4.circle(dates, fits_size, color='black') p4.line(dates, uncal_size, legend='uncalibrated fits files', line_color='red') p4.diamond(dates, uncal_size, color='red') p4.line(dates, cal_size, legend='calibrated fits files', line_color='blue') p4.square(date, cal_size, color='blue') p4.line(dates, rate_size, legend='rate fits files', line_color='green') p4.triangle(dates, rate_size, color='green') p4.line(dates, rateints_size, legend='rateints fits files', line_color='orange') p4.asterisk(dates, rateints_size, color='orange') p4.line(dates, i2d_size, legend='i2d fits files', line_color='purple') p4.x(dates, i2d_size, color='purple') p4.line(dates, nircam_size, legend='nircam fits files', line_color='midnightblue') p4.x(dates, nircam_size, color='midnightblue') p4.line(dates, nirspec_size, legend='nirspec fits files', line_color='springgreen') p4.x(dates, nirspec_size, color='springgreen') p4.line(dates, niriss_size, legend='niriss fits files', line_color='darkcyan') p4.x(dates, niriss_size, color='darkcyan') p4.line(dates, miri_size, legend='miri fits files', line_color='dodgerblue') p4.x(dates, miri_size, color='dodgerblue') p4.line(dates, fgs_size, legend='fgs fits files', line_color='darkred') p4.x(dates, fgs_size, color='darkred') # create a layout with a grid pattern to save all plots grid = gridplot([[p1, p2], [p3, p4]]) outfile = os.path.join(outputs_dir, "filesystem_monitor.html") output_file(outfile) save(grid) set_permissions(outfile) logging.info('Saved plot of all statistics to {}'.format(outfile)) # Save each plot's components plots = [p1, p2, p3, p4] plot_names = ['filecount', 'system_stats', 'filecount_type', 'size_type'] for plot, name in zip(plots, plot_names): plot.sizing_mode = 'stretch_both' script, div = components(plot) div_outfile = os.path.join(outputs_dir, "{}_component.html".format(name)) with open(div_outfile, 'w') as f: f.write(div) f.close() set_permissions(div_outfile) script_outfile = os.path.join(outputs_dir, "{}_component.js".format(name)) with open(script_outfile, 'w') as f: f.write(script) f.close() set_permissions(script_outfile) logging.info( 'Saved components files: {}_component.html and {}_component.js'. format(name, name)) logging.info('Filesystem statistics plotting complete.') # Begin logging: logging.info("Completed.")
def monitor_filesystem(): """Tabulates the inventory of the JWST filesystem, saving statistics to files, and generates plots. """ # Begin logging logging.info('Beginning filesystem monitoring.') # Get path, directories and files in system and count files in all directories settings = get_config() filesystem = settings['filesystem'] outputs_dir = os.path.join(settings['outputs'], 'monitor_filesystem') # set up dictionaries for output results_dict = defaultdict(int) size_dict = defaultdict(float) # Walk through all directories recursively and count files logging.info('Searching filesystem...') for dirpath, dirs, files in os.walk(filesystem): results_dict['file_count'] += len(files) # find number of all files for filename in files: file_path = os.path.join(dirpath, filename) if filename.endswith(".fits"): # find total number of fits files results_dict['fits_files'] += 1 size_dict['size_fits'] += os.path.getsize(file_path) suffix = filename_parser(filename)['suffix'] results_dict[suffix] += 1 size_dict[suffix] += os.path.getsize(file_path) detector = filename_parser(filename)['detector'] instrument = detector[ 0: 3] # first three characters of detector specify instrument results_dict[instrument] += 1 size_dict[instrument] += os.path.getsize(file_path) logging.info('{} files found in filesystem'.format( results_dict['fits_files'])) # Get df style stats on file system out = subprocess.check_output('df {}'.format(filesystem), shell=True) outstring = out.decode( "utf-8") # put into string for parsing from byte format parsed = outstring.split(sep=None) # Select desired elements from parsed string total = int(parsed[8]) # in blocks of 512 bytes used = int(parsed[9]) available = int(parsed[10]) percent_used = parsed[11] # Save stats for plotting over time now = datetime.datetime.now().isoformat( sep='T', timespec='auto') # get date of stats # set up output file and write stats statsfile = os.path.join(outputs_dir, 'statsfile.txt') with open(statsfile, "a+") as f: f.write("{0} {1:15d} {2:15d} {3:15d} {4:15d} {5}\n".format( now, results_dict['file_count'], total, available, used, percent_used)) set_permissions(statsfile) logging.info('Saved file statistics to: {}'.format(statsfile)) # set up and read out stats on files by type filesbytype = os.path.join(outputs_dir, 'filesbytype.txt') with open(filesbytype, "a+") as f2: f2.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format( results_dict['fits_files'], results_dict['uncal'], results_dict['cal'], results_dict['rate'], results_dict['rateints'], results_dict['i2d'], results_dict['nrc'], results_dict['nrs'], results_dict['nis'], results_dict['mir'], results_dict['gui'])) set_permissions(filesbytype, verbose=False) logging.info('Saved file statistics by type to {}'.format(filesbytype)) # set up file size by type file sizebytype = os.path.join(outputs_dir, 'sizebytype.txt') with open(sizebytype, "a+") as f3: f3.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format( size_dict['size_fits'], size_dict['uncal'], size_dict['cal'], size_dict['rate'], size_dict['rateints'], size_dict['i2d'], size_dict['nrc'], size_dict['nrs'], size_dict['nis'], size_dict['mir'], size_dict['gui'])) set_permissions(sizebytype, verbose=False) logging.info('Saved file sizes by type to {}'.format(sizebytype)) logging.info('Filesystem statistics calculation complete.') # Create the plots plot_system_stats(statsfile, filesbytype, sizebytype)
def process(self, illuminated_raw_files, illuminated_slope_files, dark_raw_files, dark_slope_files): """The main method for processing darks. See module docstrings for further details. Parameters ---------- illuminated_raw_files : list List of filenames (including full paths) of raw (uncal) flat field files. These should all be for the same detector and aperture. illuminated_slope_files : list List of filenames (including full paths) of flat field slope files. These should all be for the same detector and aperture and correspond one-to-one with ``illuminated_raw_files``. For cases where a raw file exists but no slope file, the slope file should be None dark_raw_files : list List of filenames (including full paths) of raw (uncal) dark files. These should all be for the same detector and aperture. dark_slope_files : list List of filenames (including full paths) of dark current slope files. These should all be for the same detector and aperture and correspond one-to-one with ``dark_raw_files``. For cases where a raw file exists but no slope file, the slope file should be ``None`` """ # Illuminated files - run entirety of calwebb_detector1 for uncal # files where corresponding rate file is 'None' all_files = [] badpix_types = [] badpix_types_from_flats = ['DEAD', 'LOW_QE', 'OPEN', 'ADJ_OPEN'] badpix_types_from_darks = ['HOT', 'RC', 'OTHER_BAD_PIXEL', 'TELEGRAPH'] illuminated_obstimes = [] if illuminated_raw_files: index = 0 badpix_types.extend(badpix_types_from_flats) for uncal_file, rate_file in zip(illuminated_raw_files, illuminated_slope_files): self.get_metadata(uncal_file) if rate_file == 'None': jump_output, rate_output, _ = pipeline_tools.calwebb_detector1_save_jump(uncal_file, self.data_dir, ramp_fit=True, save_fitopt=False) if self.nints > 1: illuminated_slope_files[index] = rate_output.replace('0_ramp_fit', '1_ramp_fit') else: illuminated_slope_files[index] = deepcopy(rate_output) index += 1 # Get observation time for all files illuminated_obstimes.append(instrument_properties.get_obstime(uncal_file)) all_files = deepcopy(illuminated_slope_files) min_illum_time = min(illuminated_obstimes) max_illum_time = max(illuminated_obstimes) mid_illum_time = instrument_properties.mean_time(illuminated_obstimes) # Dark files - Run calwebb_detector1 on all uncal files, saving the # Jump step output. If corresponding rate file is 'None', then also # run the ramp-fit step and save the output dark_jump_files = [] dark_fitopt_files = [] dark_obstimes = [] if dark_raw_files: index = 0 badpix_types.extend(badpix_types_from_darks) # In this case we need to run the pipeline on all input files, # even if the rate file is present, because we also need the jump # and fitops files, which are not saved by default for uncal_file, rate_file in zip(dark_raw_files, dark_slope_files): jump_output, rate_output, fitopt_output = pipeline_tools.calwebb_detector1_save_jump(uncal_file, self.data_dir, ramp_fit=True, save_fitopt=True) self.get_metadata(uncal_file) dark_jump_files.append(jump_output) dark_fitopt_files.append(fitopt_output) if self.nints > 1: #dark_slope_files[index] = rate_output.replace('rate', 'rateints') dark_slope_files[index] = rate_output.replace('0_ramp_fit', '1_ramp_fit') else: dark_slope_files[index] = deepcopy(rate_output) dark_obstimes.append(instrument_properties.get_obstime(uncal_file)) index += 1 if len(all_files) == 0: all_files = deepcopy(dark_slope_files) else: all_files = all_files + dark_slope_files min_dark_time = min(dark_obstimes) max_dark_time = max(dark_obstimes) mid_dark_time = instrument_properties.mean_time(dark_obstimes) # For the dead flux check, filter out any files that have less than # 4 groups dead_flux_files = [] if illuminated_raw_files: for illum_file in illuminated_raw_files: ngroup = fits.getheader(illum_file)['NGROUPS'] if ngroup >= 4: dead_flux_files.append(illum_file) if len(dead_flux_files) == 0: dead_flux_files = None # Instrument-specific preferences from jwst_reffiles meetings if self.instrument in ['nircam', 'niriss', 'fgs']: dead_search_type = 'sigma_rate' elif self.instrument in ['miri', 'nirspec']: dead_search_type = 'absolute_rate' flat_mean_normalization_method = 'smoothed' # Call the bad pixel search module from jwst_reffiles. Lots of # other possible parameters. Only specify the non-default params # in order to make things easier to read. query_string = 'darks_{}_flats_{}_to_{}'.format(self.dark_query_start, self.flat_query_start, self.query_end) output_file = '{}_{}_{}_bpm.fits'.format(self.instrument, self.aperture, query_string) output_file = os.path.join(self.output_dir, output_file) bad_pixel_mask.bad_pixels(flat_slope_files=illuminated_slope_files, dead_search_type=dead_search_type, flat_mean_normalization_method=flat_mean_normalization_method, run_dead_flux_check=True, dead_flux_check_files=dead_flux_files, flux_check=35000, dark_slope_files=dark_slope_files, dark_uncal_files=dark_raw_files, dark_jump_files=dark_jump_files, dark_fitopt_files=dark_fitopt_files, plot=False, output_file=output_file, author='jwst_reffiles', description='A bad pix mask', pedigree='GROUND', useafter='2222-04-01 00:00:00', history='This file was created by JWQL', quality_check=False) # Read in the newly-created bad pixel file set_permissions(output_file) badpix_map = fits.getdata(output_file) # Locate and read in the current bad pixel mask parameters = self.make_crds_parameter_dict() mask_dictionary = crds_tools.get_reffiles(parameters, ['mask'], download=True) baseline_file = mask_dictionary['mask'] if 'NOT FOUND' in baseline_file: logging.warning(('\tNo baseline bad pixel file for {} {}. Any bad ' 'pixels found as part of this search will be considered new'.format(self.instrument, self.aperture))) baseline_file = new_badpix_file yd, xd = badpix_mask.shape baseline_badpix_mask = np.zeros((yd, xd), type=np.int) else: logging.info('\tBaseline bad pixel file is {}'.format(baseline_file)) baseline_badpix_mask = fits.getdata(baseline_file) # Exclude hot and dead pixels in the current bad pixel mask #new_hot_pix = self.exclude_existing_badpix(new_hot_pix, 'hot') new_since_reffile = exclude_crds_mask_pix(badpix_map, baseline_badpix_mask) # Create a list of the new instances of each type of bad pixel for bad_type in badpix_types: bad_location_list = bad_map_to_list(new_since_reffile, bad_type) # Add new hot and dead pixels to the database logging.info('\tFound {} new {} pixels'.format(len(bad_location_list[0]), bad_type)) if bad_type in badpix_types_from_flats: self.add_bad_pix(bad_location_list, bad_type, illuminated_slope_files, min_illum_time, mid_illum_time, max_illum_time, baseline_file) elif bad_type in badpix_types_from_darks: self.add_bad_pix(bad_location_list, bad_type, dark_slope_files, min_dark_time, mid_dark_time, max_dark_time, baseline_file) else: raise ValueError("Unrecognized type of bad pixel: {}. Cannot update database table.".format(bad_type))
def process(self, file_list): """The main method for processing darks. See module docstrings for further details. Parameters ---------- file_list : list List of filenames (including full paths) to the dark current files. """ for filename in file_list: logging.info('\tWorking on file: {}'.format(filename)) # Get relevant header info for this file self.read_pattern = fits.getheader(filename, 0)['READPATT'] self.expstart = '{}T{}'.format( fits.getheader(filename, 0)['DATE-OBS'], fits.getheader(filename, 0)['TIME-OBS']) # Run the file through the necessary pipeline steps pipeline_steps = self.determine_pipeline_steps() logging.info('\tRunning pipeline on {}'.format(filename)) try: processed_file = pipeline_tools.run_calwebb_detector1_steps( filename, pipeline_steps) logging.info( '\tPipeline complete. Output: {}'.format(processed_file)) set_permissions(processed_file) except: logging.info( '\tPipeline processing failed for {}'.format(filename)) os.remove(filename) continue # Find amplifier boundaries so per-amp statistics can be calculated _, amp_bounds = instrument_properties.amplifier_info( processed_file, omit_reference_pixels=True) logging.info('\tAmplifier boundaries: {}'.format(amp_bounds)) # Get the uncalibrated 0th group data for this file uncal_data = fits.getdata(filename, 'SCI')[0, 0, :, :].astype(float) # Calculate the uncal median values of each amplifier for odd/even columns amp_medians = self.get_amp_medians(uncal_data, amp_bounds) logging.info('\tCalculated uncalibrated image stats: {}'.format( amp_medians)) # Calculate image statistics on the calibrated image cal_data = fits.getdata(processed_file, 'SCI')[0, 0, :, :] mean, median, stddev = sigma_clipped_stats(cal_data, sigma=3.0, maxiters=5) collapsed_rows, collapsed_columns = self.collapse_image(cal_data) counts, bin_centers = self.make_histogram(cal_data) logging.info( '\tCalculated calibrated image stats: {:.3f} +/- {:.3f}'. format(mean, stddev)) # Save a png of the calibrated image for visual inspection logging.info('\tCreating png of calibrated image') output_png = self.image_to_png( cal_data, outname=os.path.basename(processed_file).replace('.fits', '')) # Construct new entry for this file for the bias database table. # Can't insert values with numpy.float32 datatypes into database # so need to change the datatypes of these values. bias_db_entry = { 'aperture': self.aperture, 'uncal_filename': filename, 'cal_filename': processed_file, 'cal_image': output_png, 'expstart': self.expstart, 'mean': float(mean), 'median': float(median), 'stddev': float(stddev), 'collapsed_rows': collapsed_rows.astype(float), 'collapsed_columns': collapsed_columns.astype(float), 'counts': counts.astype(float), 'bin_centers': bin_centers.astype(float), 'entry_date': datetime.datetime.now() } for key in amp_medians.keys(): bias_db_entry[key] = float(amp_medians[key]) # Add this new entry to the bias database table self.stats_table.__table__.insert().execute(bias_db_entry) logging.info('\tNew entry added to bias database table: {}'.format( bias_db_entry)) # Remove the raw and calibrated files to save memory space os.remove(filename) os.remove(processed_file)
def generate_preview_images(): """The main function of the ``generate_preview_image`` module.""" # Begin logging logging.info("Beginning the script run") filesystem = get_config()['filesystem'] preview_image_filesystem = get_config()['preview_image_filesystem'] thumbnail_filesystem = get_config()['thumbnail_filesystem'] filenames = glob(os.path.join(filesystem, '*/*.fits')) grouped_filenames = group_filenames(filenames) logging.info(f"Found {len(filenames)} filenames") for file_list in grouped_filenames: filename = file_list[0] # Determine the save location try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) except ValueError as error: identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join(preview_image_filesystem, identifier) thumbnail_output_directory = os.path.join(thumbnail_filesystem, identifier) # Check to see if the preview images already exist and skip # if they do file_exists = check_existence(file_list, preview_output_directory) if file_exists: logging.info( "JPG already exists for {}, skipping.".format(filename)) continue # Create the output directories if necessary if not os.path.exists(preview_output_directory): os.makedirs(preview_output_directory) permissions.set_permissions(preview_output_directory) logging.info(f'Created directory {preview_output_directory}') if not os.path.exists(thumbnail_output_directory): os.makedirs(thumbnail_output_directory) permissions.set_permissions(thumbnail_output_directory) logging.info(f'Created directory {thumbnail_output_directory}') # If the exposure contains more than one file (because more # than one detector was used), then create a mosaic max_size = 8 numfiles = len(file_list) if numfiles != 1: try: mosaic_image, mosaic_dq = create_mosaic(file_list) logging.info('Created mosiac for:') for item in file_list: logging.info(f'\t{item}') except (ValueError, FileNotFoundError) as error: logging.error(error) dummy_file = create_dummy_filename(file_list) if numfiles in [2, 4]: max_size = 16 elif numfiles in [8]: max_size = 32 # Create the nominal preview image and thumbnail try: im = PreviewImage(filename, "SCI") im.clip_percent = 0.01 im.scaling = 'log' im.cmap = 'viridis' im.output_format = 'jpg' im.preview_output_directory = preview_output_directory im.thumbnail_output_directory = thumbnail_output_directory # If a mosaic was made from more than one file # insert it and it's associated DQ array into the # instance of PreviewImage. Also set the input # filename to indicate that we have mosaicked data if numfiles != 1: im.data = mosaic_image im.dq = mosaic_dq im.file = dummy_file im.make_image(max_img_size=max_size) except ValueError as error: logging.warning(error) # Complete logging: logging.info("Completed.")
def process_program(program): """Generate preview images and thumbnails for the given program. Parameters ---------- program : str The program identifier (e.g. ``88600``) """ # Group together common exposures filenames = glob.glob( os.path.join(get_config()['filesystem'], program, '*.fits')) grouped_filenames = group_filenames(filenames) logging.info('Found {} filenames'.format(len(filenames))) for file_list in grouped_filenames: filename = file_list[0] # Determine the save location try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) except ValueError: identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join( get_config()['preview_image_filesystem'], identifier) thumbnail_output_directory = os.path.join( get_config()['thumbnail_filesystem'], identifier) # Check to see if the preview images already exist and skip if they do file_exists = check_existence(file_list, preview_output_directory) if file_exists: logging.info( "JPG already exists for {}, skipping.".format(filename)) continue # Create the output directories if necessary if not os.path.exists(preview_output_directory): os.makedirs(preview_output_directory) permissions.set_permissions(preview_output_directory) logging.info( 'Created directory {}'.format(preview_output_directory)) if not os.path.exists(thumbnail_output_directory): os.makedirs(thumbnail_output_directory) permissions.set_permissions(thumbnail_output_directory) logging.info( 'Created directory {}'.format(thumbnail_output_directory)) # If the exposure contains more than one file (because more # than one detector was used), then create a mosaic max_size = 8 numfiles = len(file_list) if numfiles > 1: try: mosaic_image, mosaic_dq = create_mosaic(file_list) logging.info('Created mosiac for:') for item in file_list: logging.info('\t{}'.format(item)) except (ValueError, FileNotFoundError) as error: logging.error(error) dummy_file = create_dummy_filename(file_list) if numfiles in [2, 4]: max_size = 16 elif numfiles in [8]: max_size = 32 # Create the nominal preview image and thumbnail try: im = PreviewImage(filename, "SCI") im.clip_percent = 0.01 im.scaling = 'log' im.cmap = 'viridis' im.output_format = 'jpg' im.preview_output_directory = preview_output_directory im.thumbnail_output_directory = thumbnail_output_directory # If a mosaic was made from more than one file # insert it and it's associated DQ array into the # instance of PreviewImage. Also set the input # filename to indicate that we have mosaicked data if numfiles != 1: im.data = mosaic_image im.dq = mosaic_dq im.file = dummy_file im.make_image(max_img_size=max_size) logging.info( 'Created preview image and thumbnail for: {}'.format(filename)) except ValueError as error: logging.warning(error)
def run(self): """The main method. See module docstrings for further details. """ logging.info('Begin logging for readnoise_monitor\n') # Get the output directory and setup a directory to store the data self.output_dir = os.path.join(get_config()['outputs'], 'readnoise_monitor') ensure_dir_exists(os.path.join(self.output_dir, 'data')) # Use the current time as the end time for MAST query self.query_end = Time.now().mjd # Loop over all instruments for instrument in ['nircam', 'niriss']: self.instrument = instrument # Identify which database tables to use self.identify_tables() # Get a list of all possible apertures for this instrument siaf = Siaf(self.instrument) possible_apertures = list(siaf.apertures) for aperture in possible_apertures: logging.info('\nWorking on aperture {} in {}'.format( aperture, instrument)) self.aperture = aperture # Locate the record of the most recent MAST search; use this time # (plus a 30 day buffer to catch any missing files from the previous # run) as the start time in the new MAST search. most_recent_search = self.most_recent_search() self.query_start = most_recent_search - 30 # Query MAST for new dark files for this instrument/aperture logging.info('\tQuery times: {} {}'.format( self.query_start, self.query_end)) new_entries = mast_query_darks(instrument, aperture, self.query_start, self.query_end) logging.info('\tAperture: {}, new entries: {}'.format( self.aperture, len(new_entries))) # Set up a directory to store the data for this aperture self.data_dir = os.path.join( self.output_dir, 'data/{}_{}'.format(self.instrument.lower(), self.aperture.lower())) if len(new_entries) > 0: ensure_dir_exists(self.data_dir) # Get any new files to process new_files = [] checked_files = [] for file_entry in new_entries: output_filename = os.path.join( self.data_dir, file_entry['filename'].replace('_dark', '_uncal')) # Sometimes both the dark and uncal name of a file is picked up in new_entries if output_filename in checked_files: logging.info( '\t{} already checked in this run.'.format( output_filename)) continue checked_files.append(output_filename) # Dont process files that already exist in the readnoise stats database file_exists = self.file_exists_in_database(output_filename) if file_exists: logging.info( '\t{} already exists in the readnoise database table.' .format(output_filename)) continue # Save any new uncal files with enough groups in the output directory; some dont exist in JWQL filesystem try: filename = filesystem_path(file_entry['filename']) uncal_filename = filename.replace('_dark', '_uncal') if not os.path.isfile(uncal_filename): logging.info( '\t{} does not exist in JWQL filesystem, even though {} does' .format(uncal_filename, filename)) else: num_groups = fits.getheader( uncal_filename)['NGROUPS'] if num_groups > 1: # skip processing if the file doesnt have enough groups to calculate the readnoise; TODO change to 10 before incorporating MIRI shutil.copy(uncal_filename, self.data_dir) logging.info('\tCopied {} to {}'.format( uncal_filename, output_filename)) set_permissions(output_filename) new_files.append(output_filename) else: logging.info( '\tNot enough groups to calculate readnoise in {}' .format(uncal_filename)) except FileNotFoundError: logging.info( '\t{} does not exist in JWQL filesystem'.format( file_entry['filename'])) # Run the readnoise monitor on any new files if len(new_files) > 0: self.process(new_files) monitor_run = True else: logging.info( '\tReadnoise monitor skipped. {} new dark files for {}, {}.' .format(len(new_files), instrument, aperture)) monitor_run = False # Update the query history new_entry = { 'instrument': instrument, 'aperture': aperture, 'start_time_mjd': self.query_start, 'end_time_mjd': self.query_end, 'entries_found': len(new_entries), 'files_found': len(new_files), 'run_monitor': monitor_run, 'entry_date': datetime.datetime.now() } self.query_table.__table__.insert().execute(new_entry) logging.info('\tUpdated the query history table') logging.info('Readnoise Monitor completed successfully.')