def get(self, tag, dest_dir=None): """ Retrieve a file by tag from the data package :param tag: :param dest_dir: if None, use current workdir, otherwise use the one provided, as destination dir. for the file :return: a File instance """ self._load_status() assert tag in self._status[ 'index'], "Tag %s does not exists in data package: \n%s" % (tag, self) item = self._status['index'][tag] abs_path = self._get_abs_path(tag) this_file = File(abs_path, item['description']) if dest_dir is not None: dest = sanitize_filename(dest_dir) else: dest = os.getcwd() out_file = this_file.copy_to(dest) return out_file
def _check_directory(directory): sanitized_directory = sanitize_filename(directory) assert os.path.exists( sanitized_directory ), "Directory %s does not exists" % sanitized_directory assert os.path.isdir( sanitized_directory ), "The file %s is not a directory" % sanitized_directory return sanitized_directory
def __init__(self, directory, create=False): self._directory = sanitize_filename(directory) if os.path.exists(self._directory) and os.path.isdir(self._directory): logger.debug("Accessing data in %s" % self._directory) with work_within_directory(self._directory): # Access the index file assert os.path.exists( _index_file ), "Cannot find index file in %s" % self._directory self._load_status() self._check_consistency() else: if create: # Create directory os.makedirs(self._directory) # Create an empty index file with work_within_directory(self._directory): # By default the package is read-write self._status = {'read_only': False, 'index': {}} self._save_status() logger.info("Datapackage in %s has been created" % self._directory) else: raise IOError( "Directory %s does not exist or is not a directory" % self._directory)
def get_configuration(filename): filename = sanitize_filename(filename) assert os.path.exists( filename), "Configuration file %s does not exist!" % filename try: with open(filename, "r") as f: configuration_dict = yaml.safe_load(f) except: raise IOError( "Couldn't read configuration file %s. File is not readable, or wrong format." % (filename)) configuration = ReadOnlyContainer(configuration_dict) return configuration
def work_within_directory(directory, create=False, remove=False): directory = sanitize_filename(directory) original_directory = os.getcwd() if not os.path.exists(directory): if create: try: os.makedirs(directory) except: raise IOError("Cannot create directory %s" % directory) else: raise IOError("Directory %s does not exist" % directory) os.chdir(directory) try: yield except: raise finally: os.chdir(original_directory) if remove: shutil.rmtree(directory)
required=True, type=str) # parser.add_argument("--evtfile", help="Main event file for observation, used to get total exposure time", # required=True, type=str) # Get logger for this command logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Instance the command runner runner = CommandRunner(logger) args = parser.parse_args() data_package = DataPackage(sanitize_filename(args.package)) for bbfile_tag in data_package.find_all("ccd_?_check_var"): logger.info("Processing %s..." % bbfile_tag) bbfile = data_package.get(bbfile_tag).filename logger.info("(reading from file %s)" % bbfile) # get directory path and file name from input file arguments bb_file_path = sanitize_filename(bbfile) masterfile = sanitize_filename(args.masterfile)
def worker(this_obsid): regdir_this_obsid = os.path.join(region_repository, str(this_obsid)) if os.path.exists(regdir_this_obsid): # This could download more than one observation segment for this obsid cmd_line = "xtc_download_by_obsid.py --obsid %d" % this_obsid runner.run(cmd_line) # Get the downloaded files evtfiles = find_files.find_files( os.getcwd(), 'acisf%05i*evt3.fits' % int(this_obsid)) logger.info("Found %s event files" % len(evtfiles)) for evtfile in evtfiles: # Get the root of the evt3 file name # The evt3 file name is like acisf01578_001N001_evt3.fits.gz, # where the 001 is the observation segment name_root = "_".join(os.path.basename(evtfile).split("_") [:-1]) # this is "acisf01578_001N001" obsid_identifier = name_root.replace( "acisf", "").split("N")[0] # this is 01578_001 logger.info("Processing %s" % obsid_identifier) # Find exposure map and fov file expmaps = find_files.find_files(os.getcwd(), "%s*exp3.fits*" % name_root) fovs = find_files.find_files(os.getcwd(), "%s*fov3.fits*" % name_root) tsvfiles = find_files.find_files(os.getcwd(), "%s.tsv" % obsid_identifier) bkgmaps = find_files.find_files(os.getcwd(), "*%s*bkgimg3.fits*" % this_obsid) asol_files = find_files.find_files(os.getcwd(), '*asol*.fits.gz') assert len( expmaps ) == 1, "Wrong number of exposure maps for event file %s" % evtfile assert len( fovs ) == 1, "Wrong number of fov files for event file %s" % evtfile assert len( tsvfiles ) == 1, "Wrong number of tsv files for obsid %s" % this_obsid assert len( bkgmaps ) == 1, "Wrong number of bkg files for obsid %s" % this_obsid assert len( asol_files ) == 1, "Wrong number of asol files for obsid %s" % this_obsid tsvfile = tsvfiles[0] expmap = expmaps[0] fov = fovs[0] bkgmap = bkgmaps[0] asol = asol_files[0] logger.info("Found tsv file: %s" % tsvfile) logger.info("Found expmap: %s" % expmap) logger.info("Found fov file: %s" % fov) logger.info("Found bkg map file: %s" % bkgmap) logger.info("Found asol file: %s" % asol) logger.info("Creating data package %s" % obsid_identifier) data_package_dir = os.path.join( sanitize_filename(config['data repository']), obsid_identifier) data_package = DataPackage(data_package_dir, create=True) data_package.store("evt3", evtfile, "Event file (Level 3) from the CSC", move=True) data_package.store("tsv", tsvfile, "TSV file from the CSC", move=True) data_package.store("exp3", expmap, "Exposure map (Level 3) from the CSC", move=True) data_package.store("fov3", fov, "FOV file (Level 3) from the CSC", move=True) data_package.store("bkgmap", bkgmap, "Background map (Level 3) from the CSC", move=True) data_package.store("asol", asol, "Aspect solution file from the CSC", move=True) logger.info("done") # Make the data package read-only so we cannot change files by accident logger.info("Making it read-only") data_package.read_only = True else: logger.error("Region files do not exist for ObsID %s" % this_obsid)
required=False, default=1) args = parser.parse_args() # Get the logger logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Get the command runner runner = CommandRunner(logger) # Get the configuration config = get_configuration(args.config_file) # Sanitize the workdir data_repository = sanitize_filename(config['data repository']) region_repository = sanitize_filename(config['region repository']) data_repository_temp = os.path.join(data_repository, '__temp') with work_within_directory(data_repository_temp, create=True): # Download files if args.n_processes > 1: pool = multiprocessing.Pool(args.n_processes) try: for i, _ in enumerate(pool.imap(worker, args.obsid)):
logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Instance the command runner runner = CommandRunner(logger) args = parser.parse_args() # Setup the FTOOLS so they can be run non-interactively setup_ftools.setup_ftools_non_interactive() # creates text file with name of all level 3 region files for given Obs ID region_dir = sanitize_filename.sanitize_filename(args.region_dir) obsid = os.path.split(region_dir)[-1] # region_dir is specific to one obsid. Get general region repository where db is located db_dir = os.path.split(region_dir)[0] # Get the region files from this observation region_files = find_files.find_files(region_dir, "*reg3.fits.gz") # Open the data package data_package = DataPackage(args.in_package) # Get the pointing from the event file
# parser.add_argument("-v", "--verbosity", help="Info or debug", type=str, required=False, default='info', # choices=['info', 'debug']) # Get the logger logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Get the command runner runner = CommandRunner(logger) args = parser.parse_args() # Get the configuration config = get_configuration(args.config_file) # Get work directory and sanitize it work_directory = sanitize_filename(config['work directory']) # Now remove [ and ] (which might be there if the user is running jobs array on PBS). They would confuse # CIAO executables work_directory = work_directory.replace("[","").replace("]","") # Check whether we need to remove the workdir or not remove_work_dir = bool(config['remove work directory']) # Now move in the work directory and do the processing # Encapsulate all in a try/except clause so that even in case of errors we have the opportunity to clean up # the workdir try: for this_obsid in args.obsid:
parser.add_argument("--%s" % name, help=comment, required=True) else: parser.add_argument("--%s" % name, help=comment, default=value) # Get the logger logger = logging_system.get_logger(os.path.basename(sys.argv[0])) args = parser.parse_args() # Now make sure that all 3 repositories are different from each other # NOTE: this is a set, hence its entries are kept unique directories = { sanitize_filename(args.data_repository), sanitize_filename(args.region_repository), sanitize_filename(args.output_repository) } assert len( directories ) == 3, "The data, region and output repositories must point at different directories" # Load configuration file from the code repository with open(conf_file_template) as f: template = yaml.safe_load(f) with open(args.output, "w+") as f:
parser.add_argument("--outfile", help="Output file containing the simulated events", required=True, type=str) # Get logger for this command logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Parse arguments args = parser.parse_args() # Sanitize file names and make sure they exist bkgfile = sanitize_filename(args.bkgmap) asolfile = sanitize_filename(args.asolfile) evtfile = sanitize_filename(args.evtfile) outfile = sanitize_filename(args.outfile) expomap = sanitize_filename(args.expomap) for filename in [bkgfile, asolfile, evtfile]: assert os.path.exists(filename), "File %s does not exist" % filename logger.info("Reading background image %s..." % bkgfile) # Read background image rate_data, header = pyfits.getdata(bkgfile, 0, header=True)
def __init__(self, filename, description): self._filename = sanitize_filename(filename) self._description = description assert os.path.exists(self._filename), "Something went wrong when creating File instance. " \ "File %s does not exists!" % self._filename
"--config_file", help="Path to the configuration file", type=str, required=True) # assumption = all level 3 region files and event file are already downloaded into same directory, the region_dir args = parser.parse_args() # Get logger logger = get_logger(os.path.basename(sys.argv[0])) # Get the configuration config = get_configuration(args.config_file) region_dir = sanitize_filename(config['region repository']) with work_within_directory.work_within_directory(region_dir): # Find all region files region_files = find_files.find_files('.', '*_reg3.fits.gz') logger.info("Found %s region files" % len(region_files)) db = collections.OrderedDict() logger.info("Starting processing...") pool = multiprocessing.Pool(multiprocessing.cpu_count()) try:
parser.add_argument("--debug", help="Debug mode? (yes or no)", required=False, default='no') # Get logger for this command logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Instance the command runner runner = CommandRunner(logger) args = parser.parse_args() eventfile = sanitize_filename(args.evtfile) # Open event file tot_hot_pixels = 0 with pyfits.open(eventfile, mode='update', memmap=False) as fits_file: # Get the data extension data = fits_file['EVENTS'].data # If this is a simulated data, there is no need for this processing if fits_file['EVENTS'].header.get("XTDACSIM"): logger.info(
required=False, type=str, default='.') parser.add_argument("--verbose-debug", action='store_true') parser.add_argument("--cleanup", action='store_true') # Get the logger logger = logging_system.get_logger(os.path.basename(sys.argv[0])) # Get the command runner runner = CommandRunner(logger) args = parser.parse_args() data_path = sanitize_filename(args.data_path) masterfile = sanitize_filename(args.masterfile) transient_data = np.array(np.recfromtxt(masterfile, names=True), ndmin=1) for transient in transient_data: obsid = transient['Obsid'] ccd = transient['CCD'] candidate = transient['Candidate'] tstart = transient['Tstart'] tstop = transient['Tstop'] duration = tstop - tstart event_files = find_files(data_path, "ccd_%s_*_filtered_*.fits" % (ccd))