def get_kepler_lcs(filenames): """ Purpose: Downloads lightcurves Args: filenames - list of full filenames returns: paths_to_files - list containing the full paths to downloaded lightcurves. """ obj_ids = [i[:13] for i in filenames] keplerObs = Observations.query_criteria(target_name=obj_ids, obs_collection='Kepler') keplerProds = Observations.get_product_list(keplerObs) yourProd = Observations.filter_products(keplerProds, extension=filenames) manifest = Observations.download_products(yourProd) """ The process of downloading the files sorts them in numerical order messing up the order, so I'm making the manifest a dataframe and sampling it one at a time from the filenames, not sure if there's a way to just sort the whole list based on the original order. """ manifest = manifest.to_pandas(index='Local Path') paths_to_files = [] for i, f in enumerate( filenames): # forcing the order to match given filenames list f_sampler = make_sampler([f]) # to find the right file from manifest filename = f_sampler(manifest).index[0] # full local filepath paths_to_files.append(filename) return paths_to_files
def astroquery_sci(data_start, data_end, proposal_id, paths, download): """Retreives raw science data for a given proposal ID using astroquery To get the right files, astroquery requires the following inputs: - intentType='science' – for the darks - instrument_name="WFC3/UVIS" – or whichever instrument is needed - proposal_id -- user-defined input string of program/proposal ID """ # Collecting the raw science files for a program ID using astroquery if data_start: # Round the data end date down so that the file from the new date isn't included data_end_rnd = data_end // 0.001 * 0.001 sciobs = Observations.query_criteria(intentType='science', t_min=[data_start, data_end_rnd], instrument_name="WFC3/UVIS", proposal_id=proposal_id) else: sciobs = Observations.query_criteria(intentType='science', instrument_name="WFC3/UVIS", proposal_id=proposal_id) sciprod = Observations.get_product_list(sciobs) rawsci = Observations.filter_products(sciprod, productSubGroupDescription="RAW") # If download option is set, move to the download directory, check if empty, then download if download == True: os.chdir(paths['PID_DIR']) if os.path.exists(paths['DLD_DIR']): print('Download directory exists!! Not downloading files') else: Observations.download_products(rawsci, mrp_only=False) print('Download to {} complete'.format(paths['DLD_DIR']))
def filter_products(obsids, table): """ filters the dataproducts for the given astroquery by obsids found in obs_id() and limits to only DRZ files before downloading all filtered products to /Volumes/galaxies""" ## move indexing obsids to another function obs = table['obsid'] dataProductsByID = Observations.get_product_list(obs) dataProductsByID = Observations.filter_products(dataProductsByID, obs_id=obsids, productSubGroupDescription="DRZ") Observations.download_products(dataProductsByID, "/Volumes/galaxies/mastDownload/HST") ## SPECIFY filepath!!!
def search_and_list(): print(" How do you want to search for object : ") print(" 1. RA and DEC ") print(" 2. Target Name Resolver ") query_choice = int(input(" Enter choice 1 or 2 : ")) choice = False while (choice == False): if query_choice != 1 or query_choice != 2: re_query_choice = ( " Invalid choice!!!! \n Do you want to continue entering correct option [y]/n :" ) if re_query_choice == 'y' or re_query_choice == 'Y': query_choice = int(input(" Enter correct choice 1/2 : ")) else: SystemExit() choice = True #input through RA and DEC if query_choice == 1: search_region = input(" Enter search region : ") else: search_target = input(" Enter search target : ") search_radius = input(" Enter search radius : ") mission_ID = input(" Enter mission specific constraint\n" + str(Observations.list_missions()) + "\n otherwise leave empty : ") if query_choice == 1: ObsByCriteria = Observations.query_criteria( obs_collection=mission_ID, object_region=search_region, radius=search_radius) else: ObsByCriteria = Observations.query_criteria(obs_collection=mission_ID, objectname=search_target, radius=search_radius) #query output print("Number of results:", len(ObsByCriteria)) print(ObsByCriteria[:15]) #dataproduct query print("\n~~~~ Viewing dataproducts query ~~~~") obsids = ObsByCriteria[0:2]['obsid'] data_products_by_id = Observations.get_product_list(obsids) print(data_products_by_id) #filtering data products: productType = input( " Enter valid product type \n [SCIENCE, CALIBRATION, BIAS, DARK, FLAT, WAVECAL, NOISE, WEIGHT, AUXILIARY, INFO, CATALOG, LIGHTCURVE, TARGETPIXELS, PREVIEW, PREVIEW_FULL, PREVIEW_1D, PREVIEW_2D, THUMBNAIL, PREVIEW_THUMB, MINIMUM_SET, RECOMMENDED_SET, COMPLETE_SET, WEBSERVICE]\n : " ) Filtered_Products = Observations.filter_products(data_products_by_id, productType=productType, mrp_only=False) print(Filtered_Products)
def find_and_process(obs_collection='HST', dataproduct_type=('image'), instrument_name='ACS/WFC', filters='F814W', N=100): # Use AWS S3 URLs for the MAST records (rather than the ones at http://mast.stsci.edu) Observations.enable_cloud_dataset(profile='ndmiles_admin') # Query MAST for some ACS/WFC data query_parameters = { 'obs_collection': obs_collection, 'dataproduct_type': dataproduct_type, 'instrument_name': instrument_name, 'filters': filters } obsTable = Observations.query_criteria(**query_parameters) # Grab 100 products: # http://astroquery.readthedocs.io/en/latest/mast/mast.html#getting-product-lists products = Observations.get_product_list(obsTable['obsid'][:N]) # Filter out just the drizzled FITS files filtered_products = Observations.filter_products( products, mrp_only=False, productSubGroupDescription=['FLT']) # We want URLs like this: s3://stpubdata/hst/public/ibg7/ibg705080/ibg705081_drz.fits s3_urls = Observations.get_cloud_uris(filtered_products) # Auth to create a Lambda function session = boto3.Session(profile_name='ndmiles_admin') client = session.client('lambda', region_name='us-east-1') st = time.time() for url in s3_urls: fits_s3_key = url.replace("s3://stpubdata/", "") print(fits_s3_key) event = { 'fits_s3_key': fits_s3_key, 'fits_s3_bucket': 'stpubdata', 's3_output_bucket': 'compute-sky-lambda' } Payload = json.dumps(event) lambda_inputs = { 'FunctionName': 'compute_sky', 'InvocationType': 'Event', 'LogType': 'Tail', 'Payload': Payload } response = client.invoke(**lambda_inputs) et = time.time() print(f"Duration: {et - st:0.2f}")
def plot_lc(file, filepath='./fitsFiles/', local=True, c='blue', ax=False): """ kid should be full id including time information Args: file (str) - filename starting with kplr ending in .fits filepath (str) - path to fits file Returns: None """ if local: f = filepath + file else: filenames = [file] obj_ids = [i[:13] for i in tmp.index[:n]] keplerObs = Observations.query_criteria(target_name=obj_ids, obs_collection='Kepler') keplerProds = Observations.get_product_list(keplerObs) yourProd = Observations.filter_products(keplerProds, extension=file) manifest = Observations.download_products(yourProd) manifest = manifest.to_pandas(index='Local Path') f = manifest.index[0] if not ax: fig = plt.figure(figsize=(16, 4)) ax = fig.add_subplot(111) t, nf, err = read_kepler_curve(f) x = t y = nf axrange = 0.55 * (max(y) - min(y)) mid = (max(y) + min(y)) / 2 yaxmin = mid - axrange yaxmax = mid + axrange if yaxmin < .95: if yaxmax > 1.05: ax.set_ylim(yaxmin, yaxmax) else: ax.set_ylim(yaxmin, 1.05) elif yaxmax > 1.05: ax.set_ylim(.95, yaxmax) else: ax.set_ylim(.95, 1.05) ax.set_xlim(min(x), max(x)) color = c ax.plot(x, y, 'o', markeredgecolor='none', c=color, alpha=0.2) ax.plot(x, y, '-', markeredgecolor='none', c=color, alpha=0.7) #ax2.set_title(files[index][:13],fontsize = 20) ax.set_ylabel(r'$\frac{\Delta F}{F}$', fontsize=25) ax.tick_params(labelsize=20)
def drawData(index): # Plots the lightcurve of the point chosen ax2.cla() ### for fits files saved locally #f = pathtofits+df.index[index] #t,nf,err=read_kepler_curve(f) ### for fits files that need downloaded f = df.index[index] # full file name ****-****_llc.fits obj_id = f[:13] # observation id kplr******** keplerObs = Observations.query_criteria(target_name=obj_id, obs_collection='Kepler') keplerProds = Observations.get_product_list(keplerObs) yourProd = Observations.filter_products(keplerProds, extension=f) manifest = Observations.download_products(yourProd) filename = manifest[0][0] t, nf, err = qt.read_kepler_curve(filename) shutil.rmtree('./mastDownload') # removing the downloaded data x = t y = nf axrange = 0.55 * (max(y) - min(y)) mid = (max(y) + min(y)) / 2 yaxmin = mid - axrange yaxmax = mid + axrange if yaxmin < .95: if yaxmax > 1.05: ax2.set_ylim(yaxmin, yaxmax) else: ax2.set_ylim(yaxmin, 1.05) elif yaxmax > 1.05: ax2.set_ylim(.95, yaxmax) else: ax2.set_ylim(.95, 1.05) ax2.plot(x, y, 'o', markeredgecolor='none', c=colorVal[index], alpha=0.2) ax2.plot(x, y, '-', markeredgecolor='none', c=colorVal[index], alpha=0.7) #ax2.set_title(files[index][:13],fontsize = 20) ax2.set_xlabel('Time (Days)', fontsize=22) ax2.set_ylabel(r'$\frac{\Delta F}{F}$', fontsize=30) fig.suptitle(files[index][:13], fontsize=30) canvas.draw()
def id_wcs_file(secid): """ Use astroquery to locate the FFI file that contains the WCS. Download it Return the filtered data product. """ obs_table = Observations.query_criteria(obs_id=secid) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) return filtered
def query(self, date_range, aws=False): """ Submit a query to MAST for observations in the date range Parameters ---------- date_range : tuple Tuple of `astropy.time.Time` objects correspond to the beginning and end of a one month interval aws : bool If True, query returns references to data hosted in S3. """ LOG.info('Submitting query to MAST') if aws: Observations.enable_s3_hst_dataset() start, stop = date_range # there shouldn't be any data taken after the most recent file query_params = { 'project': self.project, 'dataproduct_type': self.product_type, 'intentType': self.obstype, 'target_name': self.target_name, 'instrument_name': self.instr, 't_min': [start.mjd, stop.mjd], 't_exptime': self.t_exptime } with warnings.catch_warnings(): warnings.simplefilter('error') try: obsTable = Observations.query_criteria(**query_params) except Exception as e: msg = ('{}\n Date range [{}, {}]\n {}'.format(e, start.iso, stop.iso, self._msg_div)) LOG.error(msg) else: LOG.info('Filtering observations...') products = Observations.get_product_list(obsTable) filter_params = { 'mrp_only': False, 'productSubGroupDescription':self.SubGroupDescription } filt_products = Observations.filter_products(products, **filter_params) key = start.datetime.date().isoformat() # 'YYYY-MM-DD' self.products[key] = filt_products
def plot_top_n(Q_coo, n=10, sortby='sAverage', top_n_df=False): """ Purpose: Plots the top n outlier lightcurves for a given quarter. This assumes data is saved in the standard way for coo files, i.e. that feature data is saved as the attribute 'data' in a dataframe, that outlier scores are saved as 'scores', and that the indices of each are the full file names. Args: Q_coo (cluster outlier object) - The cluster outlier object containing the feature data and outlier scores attributes n (int) - number of plots to create of most outlying points sortby (optional, str) - how to sort scores, default is the sampled average of k=4 to 13 top_n_df (optional, boolean) - whether or not to return a dataframe containing the features of the top outliers Returns: top_n_feats (optional, dataframe) - pandas dataframe containing the features of the top n outliers """ tmp = Q_coo.scores.sort_values(by=sortby, ascending=False) filenames = list(tmp.index[:n]) obj_ids = [i[:13] for i in tmp.index[:n]] keplerObs = Observations.query_criteria(target_name=obj_ids, obs_collection='Kepler') keplerProds = Observations.get_product_list(keplerObs) yourProd = Observations.filter_products(keplerProds, extension=filenames) manifest = Observations.download_products(yourProd) """ The process of downloading the files sorts them in numerical order messing up the order of most outlying to leas, so I'm making the manifest a dataframe and sampling it one at a time from the filenames, not sure if there's a way to just sort the whole list based on the original order. """ manifest = manifest.to_pandas(index='Local Path') for i, f in enumerate( filenames): # forcing the order to match most to least outlying f_sampler = make_sampler([f]) # to find the right file from manifest filename = f_sampler(manifest).index[0] # full local filepath fig = plt.figure(figsize=(15, 1)) ax = fig.add_subplot(111) t, nf, err = read_kepler_curve(filename) ax.errorbar(t, nf, err) plt.title('KIC {}'.format(int(obj_ids[i][4:]))) shutil.rmtree('./mastDownload') # removing the downloaded data if top_n_df: top_n_sampler = make_sampler(tmp[:n].index) top_n_feats = top_n_sampler(Q_coo.data) return top_n_feats else: return
def aws_fullframe_fits(): """ Loop through full frame files, extract a subarray, and calculate mean. This must be done in a way that the file is deleted as soon as it is, no longer necessary to keep, so we do not use up all the disk space. """ import boto3 import os import typing import numpy as np from astropy.io import fits from astroquery.mast import Observations from tess_bert import shortcuts as tess_shortcuts from urllib.parse import urlparse, ParseResult work_queue, done_queue, ologger = utils.comm_binders(aws_fullframe_fits) OBS_ID: str = 'tess-s0001-1-1' DATA_DIR: str = os.path.join(shortcuts.getcwd(), 'data') if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) obs_table = Observations.query_criteria(obs_id=OBS_ID) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) Observations.enable_cloud_dataset() for idx, s3_url in enumerate( Observations.get_cloud_uris(filtered, includeBucket=True)): url_parts: ParseResult = urlparse(s3_url) filepath: str = os.path.join(DATA_DIR, os.path.basename(url_parts.path)) done_queue.put({ 'bucket_path': url_parts.path.strip('/'), 'filepath': filepath, 'bucket': url_parts.netloc }) if idx > 2 and constants.DEBUG: break
def query_by_data_id(dataset_ids, file_type): """ Astroquery query by file rootname(s) or ASN ID. Query will return all records found for IDs in `dataset_ids` list (or string if single ID) of type `file_type`. `file_type` can be set to a string ('FLT'), a list of strings (['FLT', DRZ']), or 'any'.""" # initial query for all files in visit, since you can only query by ASN ID # and `dataset_ids might contain single exposure rootnames if type(dataset_ids) == str: visit_ids = [dataset_ids[0:6] + '*'] if type(dataset_ids) == list: visit_ids = list(set([x[0:6] + '*' for x in dataset_ids])) query_products_total = None for i, idd in enumerate(visit_ids): obsTable = Observations.query_criteria(obstype='all', obs_collection='HST', obs_id=idd) if file_type == 'any': query_products = Observations.get_product_list(obsTable) else: if type(file_type) == str: file_type = file_type.upper() if type(file_type) == list: file_type = [x.upper() for x in file_type] query_products = Observations.get_product_list(obsTable) query_products = Observations.filter_products( query_products, productSubGroupDescription=file_type) if i == 0: query_products_total = query_products else: query_products_total = vstack( [query_products, query_products_total]) # Initially all visit files were returned. Now select only specified IDs remove_rows = [] for i, obs_id in enumerate(query_products_total['obs_id']): if obs_id not in dataset_ids: remove_rows.append(i) query_products_total.remove_rows(remove_rows) print('{} records found'.format(len(query_products_total))) return query_products_total
def astroquery_darks(anneal_start, anneal_end, paths, download): """Retreives darks within an anneal cycle using astroquery To get the right files, astroquery requires the following inputs: - intentType='calibration' – for the darks - instrument_name="WFC3/UVIS" – or whichever instrument is needed - t_min – Anneal dates start and end defined above - target_name=DARK* – all files identified as a type of dark NO exposure time should be given (too stringent a cut). The astroquery command returns more than the required files than would be collected from MAST. However, the darks CTE correction code then selects only the files it needs from these, so this is not a problem. Astroquery takes the start and end dates of the anneal cycle. The end anneal date in the astroquery command below is slightly reduced so that the file at the start of the next anneal is not included. e.g. [58170.7838 (start of Feb 21 2018 anneal), 58201.3608 (start of next Mar 24 2018 anneal] --> [58170.7838, 58201.360]""" # Round the next anneal start date down so that the file from the new date isn't included anneal_end_rnd = anneal_end // 0.001 * 0.001 # Collecting the raw dark files for one anneal cycle using astroquery darkobs = Observations.query_criteria(intentType='calibration', instrument_name="WFC3/UVIS", t_min=[anneal_start, anneal_end_rnd], target_name="DARK*") darkprod = Observations.get_product_list(darkobs) rawdark = Observations.filter_products(darkprod, productSubGroupDescription="RAW") # If download option is set, move to the anneal directory, check if empty, then download if download == True: os.chdir(paths['ANN_DIR']) if os.path.exists(paths['DLD_DIR']): print('Download directory exists!! Not downloading files') else: Observations.download_products(rawdark, mrp_only=False) print('Download to {} complete'.format(paths['DLD_DIR']))
def download_tess_data(target_name): r"""Run a query to MAST to obtain the TESS data of an object. Data is saved in the directories: `./mastDownload/TESS/<target_name>_*/*.fits` Args: target_name (str): name of the target: e.g., '231663901' Returns: bool: it returns `True` if data are retrieved and `False` otherwise. """ tess_obs = Observations.query_criteria(target_name=target_name, obs_collection='TESS') if len(tess_obs) == 0: msgs.warning('No TESS data for target {}'.format(target_name)) return False for idx in np.arange(0, len(tess_obs)): tess_prods = Observations.get_product_list(tess_obs[idx]) tess_fits_prods = Observations.filter_products(tess_prods, extension='fits', mrp_only=False) Observations.download_products(tess_fits_prods, mrp_only=False, cache=False) return True
def download_kepler_data(target_name): r"""Run a query to MAST to obtain the kepler data of an object The data is saved in the directories: `./mastDownload/Kepler/<target_name>_*/*.fits` Args: target_name (str): name of the target: e.g., 'kplr011446443' Returns: bool: it returns `True` if data are retrieved and `False` otherwise. """ kepler_obs = Observations.query_criteria(target_name=target_name, obs_collection='Kepler') if len(kepler_obs) == 0: msgs.warning('No Kepler data for target {}'.format(target_name)) return False for idx in np.arange(0, len(kepler_obs)): kepler_prods = Observations.get_product_list(kepler_obs[idx]) kepler_fits_prods = Observations.filter_products(kepler_prods, extension='fits', mrp_only=False) Observations.download_products(kepler_fits_prods, mrp_only=False, cache=False) return True
def query_by_propid_targ_filter(prop_ids, target_names='any', filters='any', file_types='any'): """ Astroquery query for data from `instrument` by target name, filter, and proposal ID. Returns table of data products of file type(s) in `file_type`. Parameters ---------- prop_ids : str or list of str Proposal ID(s) target_names : str list of str Exact target names that should be returned in query, others will be excluded. All spelling/name variations that might appear in MAST should be provided. If 'any', all available targets will be returned. filters: str of list of str. Filters that should be returned in query, other will be excluded. If 'any', all available filters will be returned. file_types : str or list or File extention type(s) desired (i.e flt, flc, drz...), as a string for a single type or a list for many. If 'any', all available file types will be returned. Returns -------- query_products : `astropy.table.Table` Table of products returned from query. """ if type(prop_ids) != list: prop_ids = [prop_ids] if target_names == 'any': target_names = '*' if filters == 'any': filters = '*' query_products = Table() j = 0 for i, prop_id in enumerate(prop_ids): # iterate to avoid timeout print('Querying for data from {}.'.format(prop_id)) obsTable = Observations.query_criteria(obs_collection='HST', proposal_id=prop_id, target_name=target_names, filters=filters) if file_types == 'any': query_products = Observations.get_product_list(obsTable) else: if type(file_types) == str: file_types = file_types.upper() if type(file_types) == list: file_types = [x.upper() for x in file_types] query_products = Observations.get_product_list(obsTable) query_products = Observations.filter_products( query_products, productSubGroupDescription=file_types) if len(query_products) == 0: print('No records found in query.') j = 0 if len(prop_ids) == 1: return continue if (i == 0) & (j == 0): query_products_total = query_products else: query_products_total = vstack( [query_products, query_products_total]) print('{} records found'.format(len(query_products))) j = 1 return query_products_total
def retrieve_observation(obsid, suffix=['FLC'], archive=False, clobber=False): """Simple interface for retrieving an observation from the MAST archive If the input obsid is for an association, it will request all members with the specified suffixes. Parameters ----------- obsid : string ID for observation to be retrieved from the MAST archive. Only the IPPSSOOT (rootname) of exposure or ASN needs to be provided; eg., ib6v06060. suffix : list, optional List containing suffixes of files which should be requested from MAST. Default value "['FLC']". archive : Boolean, optional Retain copies of the downloaded files in the astroquery created sub-directories? Default is "False". clobber : Boolean, optional Download and Overwrite existing files? Default is "False". Returns ------- local_files : list List of filenames """ local_files = [] if Observations is None: log.warning("The astroquery package was not found. No files retrieved!") return local_files # Query MAST for the data with an observation type of either "science" or # "calibration" obs_table = Observations.query_criteria(obs_id=obsid) # Catch the case where no files are found for download if not obs_table: log.info("WARNING: Query for {} returned NO RESULTS!".format(obsid)) return local_files dpobs = Observations.get_product_list(obs_table) data_products_by_id = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # After the filtering has been done, ensure there is still data in the # table for download. If the table is empty, look for FLT images in lieu # of FLC images. Only want one or the other (not both!), so just do the # filtering again. if not data_products_by_id: log.info("WARNING: No FLC files found for {} - will look for FLT " "files instead.".format(obsid)) suffix = ['FLT'] data_products_by_id = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # If still no data, then return. An exception will eventually be # thrown in the higher level code. if not data_products_by_id: log.info( "WARNING: No FLC or FLT files found for {}.".format(obsid)) return local_files all_images = data_products_by_id['productFilename'].tolist() log.info(all_images) if not clobber: rows_to_remove = [] for row_idx, row in enumerate(data_products_by_id): fname = row['productFilename'] if os.path.isfile(fname): log.info(fname + " already exists. File download skipped.") rows_to_remove.append(row_idx) data_products_by_id.remove_rows(rows_to_remove) manifest = Observations.download_products(data_products_by_id, mrp_only=False) if not clobber: for rownum in rows_to_remove[::-1]: if manifest: manifest.insert_row(rownum, vals=[all_images[rownum], "LOCAL", "None", "None"]) else: return all_images download_dir = None for file, file_status in zip(manifest['Local Path'], manifest['Status']): if file_status != "LOCAL": # Identify what sub-directory was created by astroquery for the # download if download_dir is None: download_dir = os.path.dirname(os.path.abspath(file)) # Move or copy downloaded file to current directory local_file = os.path.abspath(os.path.basename(file)) if archive: shutil.copy(file, local_file) else: shutil.move(file, local_file) # Record what files were downloaded and their current location local_files.append(os.path.basename(local_file)) else: local_files.append(file) if not archive: # Remove astroquery created sub-directories shutil.rmtree('mastDownload') return local_files
from astroquery.mast import Observations # Read in your list of TIC IDs, for this example I'll just have # them in an array already. #tids = ['2760710', '9006668', '9725627', '9727392', '12421862', '12423815'] with open(r"all_targets_S001_v1.csv") as sheet: sheet = sheet.readlines() print(sheet) tids = [i[:-1].split(",")[0] for i in sheet] tids = tids[6:] #tids = ['2760710', '9006668', '9725627', '9727392', '12421862', '12423815'] print(tids) n = 0 for tid in tids[:50]: # This query will get all the TESS mission data for this TIC ID. obsTable = Observations.query_criteria(obs_collection="TESS", target_name=tid) # This gets all the products for the returned set of Sectors, e.g., # light curves, target pixel files, DV reports, etc.. data_products = Observations.get_product_list(obsTable) # This will filter so that only the light curve files are left. light_curves = Observations.filter_products( data_products, productSubGroupDescription="LC") # Let's download these files. Observations.download_products(light_curves) n += 1 print(n)
from astropy.io import fits from astropy.visualization import make_lupton_rgb, ImageNormalize import matplotlib.pyplot as plt import reproject from astroquery.mast import Observations result = Observations.query_object('M83') selected_bands = result[(result['obs_collection'] == 'HST') & (result['instrument_name'] == 'WFC3/UVIS') & ((result['filters'] == 'F657N') | (result['filters'] == 'F487N') | (result['filters'] == 'F336W')) & (result['target_name'] == 'MESSIER-083')] prodlist = Observations.get_product_list(selected_bands) filtered_prodlist = Observations.filter_products(prodlist) downloaded = Observations.download_products(filtered_prodlist) blue = fits.open(downloaded['Local Path'][2]) red = fits.open(downloaded['Local Path'][5]) green = fits.open(downloaded['Local Path'][8]) target_header = red['SCI'].header green_repr, _ = reproject.reproject_interp(green['SCI'], target_header) blue_repr, _ = reproject.reproject_interp(blue['SCI'], target_header) rgb_img = make_lupton_rgb( ImageNormalize(vmin=0, vmax=1)(red['SCI'].data), ImageNormalize(vmin=0, vmax=0.3)(green_repr), ImageNormalize(vmin=0, vmax=1)(blue_repr),
def get_lc_file_and_data(yourpath, target): """ goes in, grabs the data for the target, gets the time index, intensity,and TIC if connection error w/ MAST, skips it. Also masks any flagged data points according to the QUALITY column. parameters: * yourpath, where you want the files saved to. must end in / * targets, target list of all TICs modified [lcg 07082020] - fixed handling no results, fixed deleting download folder""" fitspath = yourpath + 'mastDownload/TESS/' # >> download directory targ = "TIC " + str(int(target)) print(targ) try: #find and download data products for your target objectname='TIC '+str(int(target)), obs_table = Observations.query_criteria(obs_collection='TESS', dataproduct_type='timeseries', target_name=str(int(target)), objectname=targ) data_products_by_obs = Observations.get_product_list(obs_table[0:8]) filter_products = Observations.filter_products(data_products_by_obs, description = 'Light curves') if len(filter_products) != 0: manifest = Observations.download_products(filter_products, download_dir= yourpath, extension='fits') else: print("Query yielded no matching data produts for ", targ) time1 = 0 i1 = 0 ticid = 0 #get all the paths to lc.fits files filepaths = [] for root, dirs, files in os.walk(fitspath): for name in files: #print(name) if name.endswith(("lc.fits")): filepaths.append(root + "/" + name) #print(len(filepaths)) #print(filepaths) if len(filepaths) == 0: #if no lc.fits were downloaded, move on print("No lc.fits files available for TIC ", targ) time1 = 0 i1 = 0 ticid = 0 else: #if there are lc.fits files, open them and get the goods #get the goods and then close it f = fits.open(filepaths[0], memmap=False) time1 = f[1].data['TIME'] i1 = f[1].data['PDCSAP_FLUX'] ticid = f[1].header["TICID"] quality = f[1].data['QUALITY'] f.close() # >> mask out any nonzero points flagged_inds = np.nonzero(quality) i1[flagged_inds] = np.nan # >> will be interpolated later #then delete all downloads in the folder, no matter what type if os.path.isdir(yourpath + "mastDownload") == True: shutil.rmtree(yourpath + "mastDownload") print("Download folder deleted.") #corrects for connnection errors except (ConnectionError, OSError, TimeoutError, RemoteServiceError): print(targ, " could not be accessed due to an error.") i1 = 0 time1 = 0 ticid = 0 return time1, i1, ticid
if not os.path.exists('DATA'): os.makedirs('DATA') # This downloads all the F160W DRZ images from CANDELS in the GOODS-South field print('\nQuerying the MAST archive.\n') obsTable = Observations.query_criteria(obs_collection='HST', filters='F160W', instrument_name='WFC3/IR', proposal_id=['12062', '12061', '12062']) # Grab the list of available data products for these observations products = Observations.get_product_list(obsTable) # Select only drizzled (DRZ) files filtered = Observations.filter_products(products, mrp_only=False, productSubGroupDescription='DRZ') # Enable 'S3 mode' for module which will return S3-like URLs for FITs files # e.g. s3://stpubdata/hst/public/icde/icde43l0q/icde43l0q_drz.fits Observations.enable_s3_hst_dataset() # Grab the S3 URLs for each of the observations s3_urls = Observations.get_hst_s3_uris(filtered) print('Query returned {} entries.\n'.format(len(filtered))) print('Downloading data from S3.\n') s3 = boto3.resource('s3')
def hst_button( galaxies, skymethod='globalmin+match', instruments="ACS/WFC", prop_ids=None, filters=None, radius=None, filepath=None, download_data=True, correct_astrometry=True, create_mosaic=True, jy_conversion=True, verbose=False, log_filename='hst.log', ): """Create a HST mosaic, given a galaxy name. Using a galaxy name and radius, queries around that object, downloads available HST data and mosaics into a final product. It will create separate mosaics for each proposal ID, and the file structure will look like ``/galaxy/HST/proposal_id/galaxy_instrument_filter_proposal_id.fits``. N.B. I must confess to not being well-versed with HST data, so if anyone can help improve this please let me know. This data button uses a number of tools included in the drizzlepac Python package. This includes alignimages/tweakreg and astrodrizzle, which correct astrometry and are specifically tailored for the setup of HST data. This means that 1) creating mosaics with this will likely take a long time and 2) you will need a beefy computer (especially with regards to hard drive space). Args: galaxies (str or list): Names of galaxies to create mosaics for. Resolved by NED. skymethod (str, optional): Method used for AstroDrizzle's background matching step. In general, this can be left untouched but for mosaics with little overlap, it may be worth playing around with this. For instance, I've had some luck when there isn't much overlap between exposures using 'globalmin'. Options are 'localmin', 'globalmin+match', 'globalmin', and 'match'. Defaults to 'globalmin+match'. instruments (str or list, optional): Instrument to download data for. Can be any combination of 'ACS/WFC', 'WFC3/IR', 'WFC3/UVIS', 'WFPC2/PC', or 'WFPC2/WFC'. If you want all available data for all these instruments, select 'all', but this is not recommended! Defaults to 'ACS/WFC'. prop_ids (str or list, optional): Proposal IDs to download data for. Defaults to None, which will pull out all proposal IDs for each instrument. filters (str or list, optional): Filters to download data for. The script will look for each filter, for each instrument. Defaults to None, which will pull out all applicable filters for each instrument, for each proposal ID. radius (astropy.units.Quantity, optional): Radius around the galaxy to search for observations. Defaults to None, where it will query Ned to get size. filepath (str, optional): Path to save the working and output files to. If not specified, saves to current working directory. download_data (bool, optional): If True, will download data from MAST. Defaults to True. correct_astrometry (bool, optional): If True, will perform astrometric corrections to the downloaded data using alignimages. Defaults to True. create_mosaic (bool, optional): Switching this to True will mosaic the data using astrodrizzle as appropriate. Defaults to True. jy_conversion (bool, optional): Convert the mosaicked file from raw units to Jy/pix. Defaults to True. verbose (bool, optional): Can be used to suppress most of the output messages produced during the process. Mainly useful for debugging. Defaults to False. log_filename (str, optional): Will produce a stripped down log of what data the code is reducing. By default, will save to galaxy/hst.log. """ if isinstance(galaxies, str): galaxies = [galaxies] if isinstance(instruments,str): instruments = [instruments] if instruments == 'all': instruments = ['ACS/WFC', 'WFC3/IR','WFC3/UVIS', 'WFPC2/PC','WFPC2/WFC'] if isinstance(filters,str): filters = [filters] if isinstance(prop_ids,str): prop_ids = [prop_ids] if filepath is not None: os.chdir(filepath) orig_dir = os.getcwd() if radius is not None: original_radius = radius.copy() else: original_radius = None steps = [] if download_data: steps.append(1) if correct_astrometry: steps.append(2) if create_mosaic: steps.append(3) if jy_conversion: steps.append(4) # Set up folders for various corrections os.environ['CRDS_SERVER_URL'] = 'https://hst-crds.stsci.edu' os.environ['CRDS_PATH'] = orig_dir+'/reference_files' os.environ['iref'] = orig_dir+'/reference_files/references/hst/wfc3/' os.environ['jref'] = orig_dir+'/reference_files/references/hst/acs/' os.environ['uref'] = orig_dir+'/reference_files/references/hst/wfpc2/' # For large proposals, astrodrizzle can run into file open # issues so raise the max file open amount. _, hard = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE,(hard,hard)) # Change the temp directory -- if this gets filled up it can cause # problems. orig_tmpdir = os.environ['TMPDIR'] if not os.path.exists('tmp'): os.mkdir('tmp') os.environ['TMPDIR'] = orig_dir+'/tmp' for galaxy in galaxies: if not os.path.exists(galaxy): os.mkdir(galaxy) if not os.path.exists(galaxy+'/HST'): os.mkdir(galaxy+'/HST') if not verbose: # Various packages used here put out a lot of messages. Silence info messages. loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] for logger in loggers: logger.setLevel(logging.ERROR) # Even if verbose is not True, still print out some useful messages to the # console. hst_logger = logging.getLogger('data_buttons') handler = logging.FileHandler(galaxy+'/'+log_filename,mode='w') hst_logger.addHandler(handler) hst_logger.addHandler(logging.StreamHandler()) hst_logger.setLevel(logging.INFO) hst_logger.info('Beginning '+galaxy) hst_logger.info(' ') hst_logger.info(' ') if radius is None: try: size_query = Ned.get_table(galaxy,table='diameters') radius = np.max(size_query['NED Major Axis'])/2*u.arcsec radius = radius.to(u.deg) except: hst_logger.warning(galaxy+' not resolved by Ned, using 0.2deg radius.') radius = 0.2*u.degree obs_table = Observations.query_criteria(objectname=galaxy, radius=radius, obs_type='all', obs_collection='HST') # Ignore any calibration observations. obs_table = obs_table[obs_table['intentType'] == 'science'] for instrument in instruments: # Pixel sizes for final mosaics selected to match the HLA. pix_size = {'ACS/HRC':0.025, 'ACS/SBC':0.03, 'ACS/WFC':0.05, 'NICMOS/NIC1':0.025, 'NICMOS/NIC2':0.05, 'NICMOS/NIC3':0.1, 'WFC3/IR':0.09, 'WFC3/UVIS':0.04, 'WFPC2/PC':0.05, 'WFPC2/WFC':0.1}[instrument] # Bits to consider good for drizzling. bits = {'ACS/HRC':256, 'ACS/SBC':256, 'ACS/WFC':256, 'NICMOS/NIC1':0, 'NICMOS/NIC2':0, 'NICMOS/NIC3':0, 'WFC3/IR':768, 'WFC3/UVIS':256, 'WFPC2/PC':'8,1024', 'WFPC2/WFC':'8,1024'}[instrument] # Filename extension, in order of preference. suffixes = {'ACS/WFC':['FLC','FLT'], 'WFC3/IR':['FLT'], 'WFC3/UVIS':['FLC','FLT'], 'WFPC2/PC':[['C0M','C1M']], 'WFPC2/WFC':[['C0M','C1M']], }[instrument] # The instruments often have / in the name, so account for # this in making folders and files. hst_logger.info('Beginning '+instrument) if not os.path.exists(galaxy+'/HST/'+instrument.replace('/','_')): os.mkdir(galaxy+'/HST/'+instrument.replace('/','_')) reset_filters = False instrument_table = obs_table[obs_table['instrument_name'] == instrument] reset_prop_ids = False if not prop_ids: prop_ids = list(np.unique(instrument_table['proposal_id'])) reset_prop_ids = True hst_logger.info('Available proposal IDs: '+','.join(prop_ids)) hst_logger.info(' ') for prop_id in prop_ids: hst_logger.info('Proposal ID: '+str(prop_id)) prop_table = instrument_table[instrument_table['proposal_id'] == prop_id] if not filters: filters = list(np.unique(prop_table['filters'])) reset_filters = True hst_logger.info('Available filters: '+','.join(filters)) for hst_filter in filters: # If we have a highly illegal filter, just skip. # TODO: This needs to be sorted for some fringe # cases, probably. if not hst_filter[0] == 'F': continue hst_logger.info('Filter: '+str(hst_filter)) # Pull out available data and download. filter_table = prop_table[prop_table['filters'] == hst_filter] if len(filter_table) == 0: hst_logger.warning('No available data to download. Skipping...') continue data_products_id = Observations.get_product_list(filter_table) for suffix in suffixes: download_table = Observations.filter_products(data_products_id, productSubGroupDescription=suffix, mrp_only=False) if len(download_table) > 0: break if isinstance(suffix,list): filename_exts = [ext.lower() for ext in suffix] else: filename_exts = [suffix.lower()] hst_logger.info(instrument+'/'+prop_id+'/'+hst_filter) if not os.path.exists(galaxy+ '/HST/'+ instrument.replace('/','_')+ '/'+ hst_filter): os.mkdir(galaxy+ '/HST/'+ instrument.replace('/','_')+ '/'+ hst_filter) if not os.path.exists(galaxy+'/HST/'+prop_id): os.mkdir(galaxy+'/HST/'+prop_id) full_filepath = (galaxy+ '/HST/'+ instrument.replace('/','_')+ '/'+ hst_filter+ '/' +prop_id) if not os.path.exists(full_filepath): os.mkdir(full_filepath) if 1 in steps: # Download files download_mast(download_table, download_dir="hst_temp/" + galaxy) if not os.path.exists(full_filepath+'/raw'): os.mkdir(full_filepath+'/raw') if not os.path.exists(full_filepath+'/outputs'): os.mkdir(full_filepath+'/outputs') # Pull out the relevant files, and move to base folder. for filename_ext in filename_exts: matches = [] for root, _, filenames in os.walk("hst_temp/" + galaxy): for filename in fnmatch.filter( filenames, "*_"+filename_ext+".fits" ): matches.append(os.path.join(root, filename)) for match in matches: filename = match.split('/') os.rename(match,full_filepath+'/raw/'+filename[-1]) # Clean up any temporary files. shutil.rmtree("hst_temp/" + galaxy, ignore_errors=True) filename_ext = filename_exts[0] hst_files = glob.glob(full_filepath+'/raw/*_'+filename_ext+'.fits') if 2 in steps: # First, update the WCS information in case it's # required. for filename_ext in filename_exts: hst_files = glob.glob(full_filepath+'/raw/*_'+filename_ext+'.fits') crds.assign_bestrefs(hst_files, sync_references=True) # For WFPC2, the CRDS doesn't download everything # needed. Download the GEIS data files and # rerun the bestrefs assignment. if 'WFPC2' in instrument: geis_hdrs = glob.glob(os.environ['uref']+'/*h') for geis_hdr in geis_hdrs: geis_data = geis_hdr[:-1]+'d' if not os.path.exists(geis_data): geis_data = geis_data.split('/')[-1] print(geis_data) print(os.environ['uref']) wget.download( os.environ['CRDS_SERVER_URL']+'/unchecked_get/references/hst/'+geis_data, out=os.environ['uref']) crds.assign_bestrefs(hst_files,sync_references=True) for hst_file in hst_files: stwcs.updatewcs.updatewcs(hst_file, use_db=False) os.chdir(full_filepath+'/raw') filename_ext = filename_exts[0] hst_files = glob.glob('*_'+filename_ext+'.fits') # Normalize all files. photeq.photeq(', '.join(hst_files),readonly=False) os.rename('photeq.log','../outputs/photeq.log') if 'WFPC' in instrument: # Using tweakreg, align each frame to GAIA. gaia_table = Gaia.query_object_async(coordinate=galaxy, radius=2*radius) ras = gaia_table['ra'] decs = gaia_table['dec'] source_table = Table([ras,decs]) source_table.write('gaia.cat', format='ascii.fast_commented_header') tweakreg.TweakReg(hst_files, imagefindcfg={'threshold':5,'conv_width':3}, refcat='gaia.cat', #expand_refcat=True, enforce_user_order=False, shiftfile=True, outshifts='shifts.txt', searchrad=10, minobj=5, separation=0, updatehdr=True, reusename=True, wcsname='TWEAK', interactive=False, fitgeometry='general', clean=True, see2dplot=False ) # Update the c1m files to use the TWEAK # wcs for hst_file in hst_files: dq_file = hst_file.replace('c0','c1') tweakback.tweakback(hst_file, dq_file, newname='TWEAK') plot_files = glob.glob('*.png') for plot_file in plot_files: os.remove(plot_file) cat_files = glob.glob('*.coo') for cat_file in cat_files: os.remove(cat_file) os.rename('shifts_wcs.fits','../outputs/shifts_wcs.fits') os.rename('tweakreg.log','../outputs/tweakreg.log') os.rename('shifts.txt','../outputs/shifts.txt') elif 'ACS' in instrument or 'WFC3' in instrument: # Correct astrometry using alignimages. First, # correct each frame separately. pool = mp.Pool(mp.cpu_count()) suitable_hst_files = pool.map(astrometric_correction, hst_files) pool.close() suitable_hst_files = [x for x in suitable_hst_files if x is not None] if len(suitable_hst_files) == 0: hst_logger.warning('Failure with astrometry corrections. Skipping') os.chdir(orig_dir) continue # Now, align every suitable frame simultaneously. output_table = astrometric_correction(suitable_hst_files) with open('../outputs/astrometry.pkl','wb') as table_file: pickle.dump(output_table,table_file) else: raise Exception('Unknown instrument!') os.chdir(orig_dir) os.chdir(full_filepath) if 3 in steps: os.chdir('raw') if 'WFPC2' in instrument: hst_files = glob.glob('*_c0m.fits') wcskey = 'TWEAK' elif 'ACS' in instrument or 'WFC3' in instrument: with open('../outputs/astrometry.pkl','rb') as table_file: output_table = pickle.load(table_file) # We only want fits where an acceptable astrometric # solution has been found. suitable_fits = np.where(output_table['fit_qual'] < 5) # (output_table['fit_qual'] >= 1) hst_files = list(output_table[suitable_fits]['imageName']) if len(output_table[suitable_fits]) == 0: hst_logger.warning('Failure with astrometry corrections. Skipping') os.chdir(orig_dir) continue wcskey = ' ' else: raise Exception('Unknown instrument!') # Following Dalcanton+ (2012), group exposures into # long (>50s) and short (<=50s), and process for cosmic # rays separately exp_times = [] for hst_file in hst_files: hdu = fits.open(hst_file)[0] exp_time = hdu.header['EXPTIME'] exp_times.append(exp_time) for exp_group in ['short','long']: hst_files_group = [] for i in range(len(exp_times)): if exp_times[i] > 50 and exp_group == 'long': hst_files_group.append(hst_files[i]) elif exp_times[i] <= 50 and exp_group == 'short': hst_files_group.append(hst_files[i]) if len(hst_files_group) == len(hst_files): exp_group = '' if len(hst_files_group) == 0: continue if len(exp_group) > 0: output_name = '../outputs/'+galaxy+'_'+exp_group drizzle_log_name = '../outputs/astrodrizzle_'+exp_group+'.log' else: output_name = '../outputs/'+galaxy drizzle_log_name = '../outputs/astrodrizzle.log' # Perform the mosaicking. Generally, use iminmed. # However, sometimes iminmed will fail so # for the other instruments we'll use imedian as # a fallback. combine_types = ['iminmed','imedian'] if 'WFPC2' in instrument: combine_nhigh = 1 else: combine_nhigh = 0 for combine_type in combine_types: try: astrodrizzle.AstroDrizzle( input=hst_files_group, output=output_name, preserve=False, clean=True, combine_type=combine_type, combine_nhigh=combine_nhigh, skymethod=skymethod, sky_bits=bits, driz_sep_bits=bits, driz_sep_fillval=99999, combine_hthresh=90000, final_scale=pix_size, final_bits=bits, final_fillval=0, wcskey=wcskey, final_rot=0, ) break except ValueError: pass # Move the AstroDrizzle log. os.rename('astrodrizzle.log', drizzle_log_name) # Move back to the original directory. os.chdir(orig_dir) if 4 in steps: mosaic_outputs = glob.glob(full_filepath+'/outputs/*_sci.fits') for mosaic_output in mosaic_outputs: # Replace any fillvals with NaNs. hdu = fits.open(mosaic_output)[0] hdu.data[hdu.data == 0] = np.nan fits.writeto(mosaic_output, hdu.data,hdu.header, overwrite=True) if '_long_' in mosaic_output.split('/')[-1]: new_filename = (galaxy+ '/HST/' +prop_id +'/' +galaxy +'_' +instrument.replace('/','_') +'_' +hst_filter +'_' +prop_id +'_long.fits') elif '_short_' in mosaic_output.split('/')[-1]: new_filename = (galaxy+ '/HST/' +prop_id +'/' +galaxy +'_' +instrument.replace('/','_') +'_' +hst_filter +'_' +prop_id +'_short.fits') else: new_filename = (galaxy+ '/HST/' +prop_id +'/' +galaxy +'_' +instrument.replace('/','_') +'_' +hst_filter +'_' +prop_id +'.fits') convert_to_jy(mosaic_output, new_filename) if reset_filters: filters = None hst_logger.info(' ') if reset_prop_ids: prop_ids = None hst_logger.info(' ') if original_radius is None: radius = None else: radius = original_radius.copy() # Clear out the tmp folder and reset to the original. shutil.rmtree('tmp/', ignore_errors=True) os.environ['TMPDIR'] = orig_tmpdir
def retrieve_observation(obsid, suffix=['FLC'], archive=False,clobber=False): """Simple interface for retrieving an observation from the MAST archive If the input obsid is for an association, it will request all members with the specified suffixes. Parameters ----------- obsid : string ID for observation to be retrieved from the MAST archive. Only the IPPSSOOT (rootname) of exposure or ASN needs to be provided; eg., ib6v06060. suffix : list List containing suffixes of files which should be requested from MAST. path : string Directory to use for writing out downloaded files. If `None` (default), the current working directory will be used. archive : Boolean Retain copies of the downloaded files in the astroquery created sub-directories? Default is 'False'. clobber : Boolean Download and Overwrite existing files? Default is 'False'. Returns ------- local_files : list List of filenames """ local_files = [] # Query MAST for the data with an observation type of either "science" or "calibration" obsTable = Observations.query_criteria(obs_id=obsid, obstype='all') # Catch the case where no files are found for download if len(obsTable) == 0: print("WARNING: Query for {} returned NO RESULTS!".format(obsid)) return local_files dpobs = Observations.get_product_list(obsTable) dataProductsByID = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # After the filtering has been done, ensure there is still data in the table for download. # If the table is empty, look for FLT images in lieu of FLC images. Only want one # or the other (not both!), so just do the filtering again. if len(dataProductsByID) == 0: print("WARNING: No FLC files found for {} - will look for FLT files instead.".format(obsid)) suffix = ['FLT'] dataProductsByID = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # If still no data, then return. An exception will eventually be thrown in # the higher level code. if len(dataProductsByID) == 0: print("WARNING: No FLC or FLT files found for {}.".format(obsid)) return local_files allImages = [] for tableLine in dataProductsByID: allImages.append(tableLine['productFilename']) print(allImages) if not clobber: rowsToRemove = [] for rowCtr in range(0,len(dataProductsByID)): if os.path.exists(dataProductsByID[rowCtr]['productFilename']): print("{} already exists. File download skipped.".format(dataProductsByID[rowCtr]['productFilename'])) rowsToRemove.append(rowCtr) if rowsToRemove: rowsToRemove.reverse() for rowNum in rowsToRemove: dataProductsByID.remove_row(rowNum) manifest = Observations.download_products(dataProductsByID, mrp_only=False) if not clobber: rowsToRemove.reverse() for rownum in rowsToRemove: if not manifest: local_files = allImages return local_files else: manifest.insert_row(rownum,vals=[allImages[rownum],"LOCAL","None","None"]) download_dir = None for file,fileStatus in zip(manifest['Local Path'],manifest['Status']): if fileStatus != "LOCAL": # Identify what sub-directory was created by astroquery for the download if download_dir is None: file_path = file.split(os.sep) file_path.remove('.') download_dir = file_path[0] # Move or copy downloaded file to current directory local_file = os.path.abspath(os.path.basename(file)) if archive: shutil.copy(file, local_file) else: shutil.move(file, local_file) # Record what files were downloaded and their current location local_files.append(os.path.basename(local_file)) else: local_files.append(file) if not archive: # Remove astroquery created sub-directories shutil.rmtree(download_dir) return local_files
def bert_tess_fullframe_main_2(): """Continuation of main function to run it across different sectors.""" import os import time import boto3 from astropy.io import fits from astropy.wcs import WCS from astroquery.mast import Observations s3 = boto3.resource('s3') bucket = s3.Bucket(name=os.environ.get('AWSBUCKETNAME')) outbucket = s3.Bucket(name=os.environ.get('CACHEBUCKETNAME')) homedir = os.environ.get('HOME') work_queue, done_queue, ologger = utils.comm_binders( bert_tess_fullframe_main_2) # Example event: # { # "tic_id": "25155310", # "sec_id": "tess-s0001-4-1", # "ra": 63.3739396231274, # "dec": -69.226822697583, # "radius": 2.5, # "cutout_width": 30, # "use_cache": "true" # } # # work_queue populated by calling Lambda for event in work_queue: tic_id = event['tic_id'] sec_id = event['sec_id'] basename = f'{sec_id}_s3_uris.txt' # noqa filename = os.path.join(homedir, basename) try: # Check if URI list already cached. # According to MAST, there is no need to invalidate cache here. ologger.info(f'Attempting to download {basename} from S3') outbucket.download_file( basename, filename, ExtraArgs={"RequestPayer": "requester"}) except Exception: # Find full frame dataset for the observation ID. ologger.info('Started quering Observations...') obs_table = Observations.query_criteria(obs_id=sec_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products( products, productSubGroupDescription="FFIC", mrp_only=False) # Use AWS S3 bucket to pull data from. Observations.enable_cloud_dataset(verbose=False) ologger.info('Started obtaining cloud URIs...') t_start = time.time() s3_urls = Observations.get_cloud_uris( filtered, include_bucket=False) t_end = time.time() ologger.info(f'Got {len(s3_urls)} URIs in {t_end - t_start} s') # Upload URI list to cache. with open(filename, 'w') as fout: for url in s3_urls: fout.write(url + os.linesep) try: outbucket.upload_file( filename, basename, ExtraArgs={"RequestPayer": "requester"}) except Exception as exc: ologger.error(str(exc)) else: ologger.info(f'Uploaded {basename} to S3') else: # Use cache if it exists. with open(filename, 'r') as fin: s3_urls = [url.strip() for url in fin.readlines()] ologger.info(f'Read {len(s3_urls)} URIs from {basename}') finally: # Clean up if os.path.exists(filename): os.remove(filename) ra = float(event['ra']) dec = float(event['dec']) # TODO: Cache good WCS for a given sector/camera/ccd combo and use # known good cache if available. # Find pixel coordinates from sky from first frame header. key = s3_urls[0] basename = key.split('/')[-1] filename = os.path.join(homedir, basename) ologger.info(f'Resolving WCS from {key}') bucket.download_file( key, filename, ExtraArgs={"RequestPayer": "requester"}) hdr = fits.getheader(filename, ext=1) if hdr.get('WCSAXES', 0) != 2: # Good WCS according to MIT ologger.error(f'{key} has invalid WCS') continue w = WCS(hdr) pix = w.all_world2pix(ra, dec, 0) xpos = round(float(pix[0])) # float needed to get rid of 0-D array ypos = round(float(pix[1])) # Clean up os.remove(filename) # The star needs to be at least 2*radii pixels away in both X and Y. radius = float(event['radius']) edge_r = 2 * radius naxis1, naxis2 = w.pixel_shape # X Y if (xpos < edge_r or xpos >= (naxis1 - edge_r) or ypos < edge_r or ypos >= (naxis2 - edge_r)): ologger.error( f'TIC f{tic_id} in {sec_id}: X={xpos},Y={ypos} not at least ' f'{edge_r} pixels away from the edge, skipping...') continue # Pass data into the next AWS Lambda function. ologger.info(f'TIC f{tic_id} in {sec_id}: Started processing ' 'full frame URIs...') for url in s3_urls: done_queue.put({ 'key': url, 'tic_id': tic_id, 'ra': ra, 'dec': dec, 'xpos': xpos, 'ypos': ypos, 'radius': radius, 'cutout_width': event['cutout_width'], 'use_cache': event['use_cache']})
@author: smullally """ from astroquery.mast import Observations Observations.enable_cloud_dataset(provider='AWS') target = "Kepler-10" #Do a cone search and find the Kepler long cadence data for your target obs = Observations.query_object(target, radius="0s") want = (obs['obs_collection'] == "Kepler") & (obs['t_exptime'] == 1800.0) #Pick which data you want to retrieve data_prod = Observations.get_product_list(obs[want]) filt_prod = Observations.filter_products( data_prod, description="Lightcurve Long Cadence (CLC) - Q4") #Move data from the S3 bucket to the default astroquery location. #cloud_only=True means that data will only be retrieved if available on AWS S3 manifest = Observations.download_products(filt_prod) #%% import pdb from lightkurve import search_targetpixelfile def afunction(): pdb.set_trace()
def lambda_handler(event, context): """Extract light curve data from one TESS full frame image. Parameters ---------- event : dict API Gateway Lambda Proxy Input Format. Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format context : object Lambda Context runtime methods and attributes. Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ result : dict API Gateway Lambda Proxy Output Format. Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html """ # noqa obs_id = event['id'] # TESS observation ID; Example: 'tess-s0001-1-1' # TODO: Calculate some of these from the 10th frame? # For now, also takes these and pass them onto worker: payload = { 'xpos': event['xpos'], 'ypos': event['ypos'], 'radius': event['radius'], 'bright_pixel_threshold': event['bright_pixel_threshold'] } # Find full frame dataset for the observation ID. obs_table = Observations.query_criteria(obs_id=obs_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) # Use AWS S3 bucket to pull data from. Observations.enable_cloud_dataset() # TODO: verbose=False ? s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False) # TODO: Timed out! Try https://docs.python.org/3/library/asyncio.html ? # TODO: Handle same Lambda call invoked multiple times by AWS? # Call tess_fullframe_worker AWS Lambda function in parallel # https://aws.amazon.com/blogs/compute/parallel-processing-in-python-with-aws-lambda/ parent_connections = [] processes = [] data = [] for url in s3_urls[:2]: # TODO: Remove [:2] when done testing payload['key'] = url parent_conn, child_conn = Pipe() parent_connections.append(parent_conn) arg = json.dumps(payload) process = Process(target=_pipe_worker, args=(arg, child_conn)) processes.append(process) for process in processes: process.start() for process in processes: process.join() for parent_connection in parent_connections: try: response = parent_connection.recv()[0] except EOFError: response = {} if 'body' not in response: # Worker Lambda threw exception continue body = json.loads(response['body']) row = (body['midtime'], body['signal'], body['background']) if np.all(list(map(np.isfinite, row))): data.append(row) # TODO: Save data as table. # filename = f'/tmp/{obs_id}_lightcurve.csv' # with open(filename) as fout: # for row in data: # fout.write(f'{row[0]},{row[1]},{row[2]}{os.linesep}') # TODO: Upload table to S3 and then delete the table locally. # TODO: Return table S3 URL below. # TODO: Do we want to plot it and upload the plot too? # If so, need to add matplotlib as dependency. return { "statusCode": 200, "body": json.dumps({ 'n_rows': len(data), 'data_url': 'TODO' }) }
'products_dict_full_listed_WFC3IR_Filiters.joblib.save') # Select only FLT files # mrp = minimum recommended products # filtered_dict = {ir_filt:Observations.filter_products(products_dict[ir_filt], # mrp_only=False, productSubGroupDescription='FLT', # dataproduct_type='image') # for ir_filt in tqdm(WFC3IR_Filters)} print('[INFO]') filtered_dict = {} for ir_filt in tqdm(WFC3IR_Filters): filtered_dict[ir_filt] = [] for product_tbl in product_dict_full[ir_filt]: filtered_dict[ir_filt].append( Observations.filter_products(product_tbl, mrp_only=False, productSubGroupDescription='FLT', dataproduct_type='image')) print('[INFO]') joblib.dump(filtered_dict, 'filtered_dict_list_WFC3IR_Filiters.joblib.save') # Grab the S3 URLs for each of the observations # s3_urls_dict = {ir_filt:Observations.get_hst_s3_uris(filtered_dict[ir_filt]) for ir_filt in tqdm(WFC3IR_Filters)} print('[INFO]') s3_urls_dict = {} for ir_filt in tqdm(WFC3IR_Filters): s3_urls_dict[ir_filt] = [] for kf, filtered_tbl in tqdm(enumerate(filtered_dict[ir_filt]), total=len(filtered_dict[ir_filt])): for kt, tbl_now in tqdm(enumerate(filtered_tbl),
Retrieve Hubble archival data of M83 and make a figure """ from astroquery.mast import Mast, Observations from astropy.visualization import make_lupton_rgb, ImageNormalize import matplotlib.pyplot as plt import reproject result = Observations.query_object('M83') selected_bands = result[(result['obs_collection'] == 'HST') & (result['instrument_name'] == 'WFC3/UVIS') & ((result['filters'] == 'F657N') | (result['filters'] == 'F487N') | (result['filters'] == 'F336W')) & (result['target_name'] == 'MESSIER-083')] prodlist = Observations.get_product_list(selected_bands) filtered_prodlist = Observations.filter_products(prodlist) downloaded = Observations.download_products(filtered_prodlist) blue = fits.open(downloaded['Local Path'][2]) red = fits.open(downloaded['Local Path'][5]) green = fits.open(downloaded['Local Path'][8]) target_header = red['SCI'].header green_repr, _ = reproject.reproject_interp(green['SCI'], target_header) blue_repr, _ = reproject.reproject_interp(blue['SCI'], target_header) rgb_img = make_lupton_rgb(ImageNormalize(vmin=0, vmax=1)(red['SCI'].data), ImageNormalize(vmin=0, vmax=0.3)(green_repr), ImageNormalize(vmin=0, vmax=1)(blue_repr),
shape_tally = {} for prop in proposal_ids: proposal_obs = Observations.query_criteria( project='HST', instrument_name='ACS/WFC', proposal_id=[prop], ) products = Observations.get_product_list(proposal_obs) print("Products for proposal %s: %d" % (prop, len(products)), file=sys.stderr) filtered_products = Observations.filter_products( products, mrp_only=False, productSubGroupDescription='RAW', extension='fits', ) print("RAW products for proposal %s: %d" % (prop, len(filtered_products)), file=sys.stderr) if len(filtered_products) > 0: s3_uris = Observations.get_hst_s3_uris(filtered_products) for uri in s3_uris: key = uri.replace("s3://stpubdata/", "") print("Getting %s" % uri, file=sys.stderr) stpubdata.download_file(key, 'temp.fits', ExtraArgs={"RequestPayer": "requester"}) fits = astropy.io.fits.getdata('temp.fits') print("{0},{1},{2},{3}".format(prop,
import numpy as np from astropy.io import fits from astroquery.mast import Observations # NOTE: Use your own key values here. os.environ['AWS_ACCESS_KEY_ID'] = 'somekey' os.environ['AWS_SECRET_ACCESS_KEY'] = 'somesecret' # NOTE: Change TESS observation ID as needed. obs_id = 'tess-s0001-1-1' # Find full frame dataset for the observation ID. obs_table = Observations.query_criteria(obs_id=obs_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) # Set up AWS S3 bucket to pull data from. Observations.enable_cloud_dataset() s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False) s3 = boto3.resource('s3') bucket = s3.Bucket('stpubdata') def time_mean(): """Loop through full frame files, extract a subarray, and calculate mean. This must be done in a way that the file is deleted as soon as it is no longer necessary to keep, so we do not use up all the disk space. .. note:: Algorithm can also be modified to construct subarrays
def retrieve_observation(obsid, suffix=['FLC'], archive=False, clobber=False, product_type=None): """Simple interface for retrieving an observation from the MAST archive If the input obsid is for an association, it will request all members with the specified suffixes. Parameters ----------- obsid : string or list of strings ID or list of IDs for observations to be retrieved from the MAST archive. Only the IPPSSOOT (rootname) of exposure or ASN needs to be provided; eg., ib6v06060. Additionally, a wild-carded `obsid` can be provided to retrieve all products for a visit; e.g., "ib6v06*". Data from multiple ASNs, exposures or visits can be retrieved by simply providing them as a list. suffix : list, optional List containing suffixes of files which should be requested from MAST. Default value "['FLC']". archive : Boolean, optional Retain copies of the downloaded files in the astroquery created sub-directories? Default is "False". clobber : Boolean, optional Download and Overwrite existing files? Default is "False". product_type : str, optional Specify what type of product you want from the archive, either 'pipeline' or 'HAP' or 'both' (default). By default, all versions of the products processed for the requested datasets will be returned. This would include: - pipeline : files processed by `runastrodriz` to include the latest distortion calibrations and the best possible alignment to GAIA with `ipppssoot_fl[tc].fits` filenames for FLT/FLC files. - HAP : files processed as a single visit and aligned (as possible) to GAIA with `hst_<propid>_<visit>_<instr>_<det>_<filter>_<ipppssoo>_fl[tc].fits` filenames. Returns ------- local_files : list List of filenames """ local_files = [] if Observations is None: log.warning( "The astroquery package was not found. No files retrieved!") return local_files # Query MAST for the data with an observation type of either "science" or # "calibration" obs_table = Observations.query_criteria(obs_id=obsid) # Catch the case where no files are found for download if not obs_table: log.info("WARNING: Query for {} returned NO RESULTS!".format(obsid)) return local_files dpobs = Observations.get_product_list(obs_table) if product_type: ptypes = [ product_type_dict[product_type] in descr for descr in dpobs['description'] ] dpobs = dpobs[ptypes] data_products_by_id = Observations.filter_products( dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # After the filtering has been done, ensure there is still data in the # table for download. If the table is empty, look for FLT images in lieu # of FLC images. Only want one or the other (not both!), so just do the # filtering again. if not data_products_by_id: log.info("WARNING: No FLC files found for {} - will look for FLT " "files instead.".format(obsid)) suffix = ['FLT'] data_products_by_id = Observations.filter_products( dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # If still no data, then return. An exception will eventually be # thrown in the higher level code. if not data_products_by_id: log.info( "WARNING: No FLC or FLT files found for {}.".format(obsid)) return local_files all_images = data_products_by_id['productFilename'].tolist() log.info(all_images) if not clobber: rows_to_remove = [] for row_idx, row in enumerate(data_products_by_id): fname = row['productFilename'] if os.path.isfile(fname): log.info(fname + " already exists. File download skipped.") rows_to_remove.append(row_idx) data_products_by_id.remove_rows(rows_to_remove) manifest = Observations.download_products(data_products_by_id, mrp_only=False) if not clobber: for rownum in rows_to_remove[::-1]: if manifest: manifest.insert_row( rownum, vals=[all_images[rownum], "LOCAL", "None", "None"]) else: return all_images download_dir = None for file, file_status in zip(manifest['Local Path'], manifest['Status']): if file_status != "LOCAL": # Identify what sub-directory was created by astroquery for the # download if download_dir is None: download_dir = os.path.dirname(os.path.abspath(file)) # Move or copy downloaded file to current directory local_file = os.path.abspath(os.path.basename(file)) if archive: shutil.copy(file, local_file) else: shutil.move(file, local_file) # Record what files were downloaded and their current location local_files.append(os.path.basename(local_file)) else: local_files.append(file) if not archive: # Remove astroquery created sub-directories shutil.rmtree('mastDownload') return local_files
def retrieve_observation(obsid, suffix=['FLC'], archive=False,clobber=False): """Simple interface for retrieving an observation from the MAST archive If the input obsid is for an association, it will request all members with the specified suffixes. Parameters ----------- obsid : string ID for observation to be retrieved from the MAST archive. Only the IPPSSOOT (rootname) of exposure or ASN needs to be provided; eg., ib6v06060. suffix : list List containing suffixes of files which should be requested from MAST. path : string Directory to use for writing out downloaded files. If `None` (default), the current working directory will be used. archive : Boolean Retain copies of the downloaded files in the astroquery created sub-directories? Default is 'False'. clobber : Boolean Download and Overwrite existing files? Default is 'False'. Returns ------- local_files : list List of filenames """ local_files = [] # Query MAST for the data with an observation type of either "science" or "calibration" obsTable = Observations.query_criteria(obs_id=obsid, obstype='all') # Catch the case where no files are found for download if len(obsTable) == 0: log.info("WARNING: Query for {} returned NO RESULTS!".format(obsid)) return local_files dpobs = Observations.get_product_list(obsTable) dataProductsByID = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # After the filtering has been done, ensure there is still data in the table for download. # If the table is empty, look for FLT images in lieu of FLC images. Only want one # or the other (not both!), so just do the filtering again. if len(dataProductsByID) == 0: log.info("WARNING: No FLC files found for {} - will look for FLT files instead.".format(obsid)) suffix = ['FLT'] dataProductsByID = Observations.filter_products(dpobs, productSubGroupDescription=suffix, extension='fits', mrp_only=False) # If still no data, then return. An exception will eventually be thrown in # the higher level code. if len(dataProductsByID) == 0: log.info("WARNING: No FLC or FLT files found for {}.".format(obsid)) return local_files allImages = [] for tableLine in dataProductsByID: allImages.append(tableLine['productFilename']) log.info(allImages) if not clobber: rowsToRemove = [] for rowCtr in range(0,len(dataProductsByID)): if os.path.exists(dataProductsByID[rowCtr]['productFilename']): log.info("{} already exists. File download skipped.".format(dataProductsByID[rowCtr]['productFilename'])) rowsToRemove.append(rowCtr) if rowsToRemove: rowsToRemove.reverse() for rowNum in rowsToRemove: dataProductsByID.remove_row(rowNum) manifest = Observations.download_products(dataProductsByID, mrp_only=False) if not clobber: rowsToRemove.reverse() for rownum in rowsToRemove: if not manifest: local_files = allImages return local_files else: manifest.insert_row(rownum,vals=[allImages[rownum],"LOCAL","None","None"]) download_dir = None for file,fileStatus in zip(manifest['Local Path'],manifest['Status']): if fileStatus != "LOCAL": # Identify what sub-directory was created by astroquery for the download if download_dir is None: file_path = file.split(os.sep) file_path.remove('.') download_dir = file_path[0] # Move or copy downloaded file to current directory local_file = os.path.abspath(os.path.basename(file)) if archive: shutil.copy(file, local_file) else: shutil.move(file, local_file) # Record what files were downloaded and their current location local_files.append(os.path.basename(local_file)) else: local_files.append(file) if not archive: # Remove astroquery created sub-directories shutil.rmtree(download_dir) return local_files