def get_files_metadata(rootnames): """Retrieve metadata for a rootname from QL. Parameters ---------- rootnames : list A list of the new rootnames to be processed. Returns ------- metadata : tuple The complimentary metadata - ql directory, mid exposure times, filter, aperture, exposure time, sun angle, and FGS lock. """ logging.info('Getting metadata from QL database.') midexps, filterss, apertures, ql_dirs, sun_angs, exptimes, fgs_locks = [], [], [], [], [], [], [] for root in rootnames: results = ql_session.query(IR_flt_0.expstart, IR_flt_0.expend, IR_flt_0.filter, IR_flt_0.aperture, Master.dir, IR_flt_0.sunangle, IR_flt_0.exptime, IR_flt_0.fgslock).join(Master).filter(IR_flt_0.ql_root == root[0:8]).all() midexps.append(np.mean([results[0][0], results[0][1]])) filterss.append(results[0][2]) apertures.append(results[0][3]) ql_dirs.append(results[0][4]) sun_angs.append(results[0][5]) exptimes.append(results[0][6]) fgs_locks.append(results[0][7]) metadata = (ql_dirs, midexps, filterss, apertures, exptimes, sun_angs, fgs_locks)
def _retrieve_scan_data_astroquery(prop_id, fits_file_type, data_dir): """ Copies spatial scan files from quicklook directories to `data_dir`/new. Only copies files that aren't sorted into a subdirectory in `data_dir` already. """ print('Retrieving data from proposal {}'.format(str(prop_id))) results = session.query(Master.rootname).join(UVIS_flt_0).join(UVIS_spt_0).\ filter(UVIS_flt_0.proposid == prop_id).filter(UVIS_spt_0.scan_typ != 'N').all() all_scan_rootnames = [item[0] for item in results] # now compare list against files already retrieved/sorted existing_filenames = [os.path.basename(x)[0:9] for x in _get_existing_filenames(data_dir, fits_file_type)] # sometimes files have a 'j' or 's'. replace this with a q. i don't know why this is - failed obs? new_file_rootnames = [item[0:8] + 'q' for item in list(set(all_scan_rootnames) - set(existing_filenames))] print(f'Found {len(new_file_rootnames)} un-ingested files in QL database.') # query astroquery query_results = query_by_data_id(new_file_rootnames, file_type=fits_file_type) print(f'Found {len(query_results)} results in Astroquery. Downloading.') # download data download_products(query_results[0:5], output_dir=os.path.join(data_dir, 'new'))
def get_ql_records(filt): """Return a dictionary containing rootnames, paths, and filters from all filenames in the QL database. Parameters ---------- filt : str The filter to process. Can be 'all' to process all filters. Returns ------- ql_records : dict A dictionary whose keys are image rootnames and whose values are a list containing the images' path and filter. """ # Build query ql_query = ql_session.query(IR_flt_0.filter, Master.ql_root, Master.dir)\ .join(Master, Master.id == IR_flt_0.master_id)\ .join(IR_flt_1, IR_flt_1.id == IR_flt_0.id) #Filter out subarrays ql_query = ql_query.filter((IR_flt_0.aperture=='IR')|(IR_flt_0.aperture=='IR-FIX')) #Filter out grisms & blank ql_query = ql_query.filter( (IR_flt_0.filter != 'G102') & \ (IR_flt_0.filter != 'Blank') & \ (IR_flt_0.filter != 'G141')) # filter out DARKS/FLATS, last two can be commented out if needed ql_query = ql_query.filter( (IR_flt_0.targname != 'DARK') & \ (IR_flt_0.targname != 'DARK-NM') & \ (IR_flt_0.targname != 'TUNGSTEN') & \ (IR_flt_0.imagetyp != 'FLAT')) # filter out GS failures ql_query = ql_query.filter( (IR_flt_0.quality != 'GSFAIL') & \ (IR_flt_0.quality != 'LOCKLOST') & \ (IR_flt_0.quality != 'ACQ2FAIL')) # If specific filter specified, select for that only. if filt != 'all': ql_query = ql_query.filter(IR_flt_0.filter == filt.upper()) ql_query = ql_query.all() # Build ql_records list ql_records = [] for record in ql_query: ql_records.append(record) return ql_records
def query_ql(proposal_id, object_names, file_type='flt'): """ Querys the quicklook database by proposal ID for the full paths to files. Default file type is _flt.fits but you can specify _raw.fits if you wish to calibrate the data. Parameters ---------- list_new_files : list of str A list of all new file paths that you wish to check against existing data. paths: dictionary The dictionary from set_paths.py that specifys all paths/parameters. Returns ------- new_unique_files : list List of all files in list_new_files that aren't already in the data directories. """ objs = [] query_results = [] roots = [] for object in object_names: results = session.query(Master.dir,UVIS_flt_0.targname,UVIS_flt_0.filter,\ Master.rootname).join(UVIS_flt_0).\ filter(UVIS_flt_0.detector=='UVIS').\ filter(UVIS_flt_0.proposid==proposal_id).\ filter(UVIS_flt_0.targname==object).\ filter(UVIS_flt_0.filter != 'G280' ).all() rootnames = [obs.rootname for obs in results] for root in rootnames: roots.append(root) for path in results: query_results.append(path[0]) #because the query returns a tuple objsss = [obs.targname for obs in results] filters = [obs.filter for obs in results] for i, ob in enumerate(objsss): objs.append(ob) # all unique directories query_results = set(query_results) paths = [] for dir in query_results: files_in_dir = glob.glob(dir + '/*{}.fits'.format(file_type)) for filee in files_in_dir: if filee[-18:-9] in roots: paths.append(filee) return paths
def get_new_files_to_ingest(filt): """For a given filter, checks files in filesystem against files already in database. Returns a list of rootnames of files in filesystem but NOT in database, i.e. new files, to process. Next, checks if files are out of the proprietary period. Parameters ---------- filt : str The filter being processed. Returns ------- new_rootnames_public : list The new rootnames to be processed. """ logging.info('Getting list of new files to ingest for {}'.format(filt)) #Determine which rootnames are in the filesystem files_in_psf_filesystem = glob.glob(SETTINGS['output_dir'] + '/{}/*ras'.format(filt)) rootnames_in_psf_filesystem = set([os.path.basename(x)[0:9] for x in files_in_psf_filesystem]) # #Determine which rootnames are already in the database # psf_session, psf_base, psf_engine = loadConnection(SETTINGS['psf_connection_string']) # rootnames_in_database = psf_session.query(distinct(PSFTableMAST.rootname)).all() # rootnames_in_database = [item[0] for item in rootnames_in_database] # new_rootnames = set(rootnames_in_psf_filesystem) - rootnames_in_database new_rootnames = list(rootnames_in_psf_filesystem) #delete once block above is uncommented logging.info('{} total new files for {}'.format(len(new_rootnames), filt)) # Remove any new rootnames that are proprietary today = datetime.datetime.today() one_year_ago = today.replace(year=today.year-1) new_rootnames_public = [] for rootname in new_rootnames: results = ql_session.query(IR_flt_0.ql_root, IR_flt_0.date_obs)\ .filter(IR_flt_0.ql_root == rootname[:-1]).one() date_obs = datetime.datetime.combine(results[1], datetime.time.min) if date_obs < one_year_ago: new_rootnames_public.append(rootname) logging.info('{} new non-proprietary files to ingest for {}'.format(len(new_rootnames_public), filt)) return new_rootnames_public
def get_files_metadata(rootnames): logging.info('Getting metadata from QL database.') midexps, filterss, apertures, ql_dirs, sun_angs, exptimes, fgs_locks = [], [], [], [], [], [], [] for root in rootnames: results = ql_session.query(IR_flt_0.expstart, IR_flt_0.expend, IR_flt_0.filter, IR_flt_0.aperture, Master.dir, IR_flt_0.sunangle, IR_flt_0.exptime, IR_flt_0.fgslock).join(Master).filter( IR_flt_0.ql_root == root[0:8]).all() midexps.append(np.mean([results[0][0], results[0][1]])) filterss.append(results[0][2]) apertures.append(results[0][3]) ql_dirs.append(results[0][4]) sun_angs.append(results[0][5]) exptimes.append(results[0][6]) fgs_locks.append(results[0][7]) return (ql_dirs, midexps, filterss, apertures, exptimes, sun_angs, fgs_locks)