def find_and_process(obs_collection='HST', dataproduct_type=('image'), instrument_name='ACS/WFC', filters='F814W', N=100): # Use AWS S3 URLs for the MAST records (rather than the ones at http://mast.stsci.edu) Observations.enable_cloud_dataset(profile='ndmiles_admin') # Query MAST for some ACS/WFC data query_parameters = { 'obs_collection': obs_collection, 'dataproduct_type': dataproduct_type, 'instrument_name': instrument_name, 'filters': filters } obsTable = Observations.query_criteria(**query_parameters) # Grab 100 products: # http://astroquery.readthedocs.io/en/latest/mast/mast.html#getting-product-lists products = Observations.get_product_list(obsTable['obsid'][:N]) # Filter out just the drizzled FITS files filtered_products = Observations.filter_products( products, mrp_only=False, productSubGroupDescription=['FLT']) # We want URLs like this: s3://stpubdata/hst/public/ibg7/ibg705080/ibg705081_drz.fits s3_urls = Observations.get_cloud_uris(filtered_products) # Auth to create a Lambda function session = boto3.Session(profile_name='ndmiles_admin') client = session.client('lambda', region_name='us-east-1') st = time.time() for url in s3_urls: fits_s3_key = url.replace("s3://stpubdata/", "") print(fits_s3_key) event = { 'fits_s3_key': fits_s3_key, 'fits_s3_bucket': 'stpubdata', 's3_output_bucket': 'compute-sky-lambda' } Payload = json.dumps(event) lambda_inputs = { 'FunctionName': 'compute_sky', 'InvocationType': 'Event', 'LogType': 'Tail', 'Payload': Payload } response = client.invoke(**lambda_inputs) et = time.time() print(f"Duration: {et - st:0.2f}")
def aws_fullframe_fits(): """ Loop through full frame files, extract a subarray, and calculate mean. This must be done in a way that the file is deleted as soon as it is, no longer necessary to keep, so we do not use up all the disk space. """ import boto3 import os import typing import numpy as np from astropy.io import fits from astroquery.mast import Observations from tess_bert import shortcuts as tess_shortcuts from urllib.parse import urlparse, ParseResult work_queue, done_queue, ologger = utils.comm_binders(aws_fullframe_fits) OBS_ID: str = 'tess-s0001-1-1' DATA_DIR: str = os.path.join(shortcuts.getcwd(), 'data') if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) obs_table = Observations.query_criteria(obs_id=OBS_ID) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) Observations.enable_cloud_dataset() for idx, s3_url in enumerate( Observations.get_cloud_uris(filtered, includeBucket=True)): url_parts: ParseResult = urlparse(s3_url) filepath: str = os.path.join(DATA_DIR, os.path.basename(url_parts.path)) done_queue.put({ 'bucket_path': url_parts.path.strip('/'), 'filepath': filepath, 'bucket': url_parts.netloc }) if idx > 2 and constants.DEBUG: break
os.environ['AWS_ACCESS_KEY_ID'] = 'somekey' os.environ['AWS_SECRET_ACCESS_KEY'] = 'somesecret' # NOTE: Change TESS observation ID as needed. obs_id = 'tess-s0001-1-1' # Find full frame dataset for the observation ID. obs_table = Observations.query_criteria(obs_id=obs_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) # Set up AWS S3 bucket to pull data from. Observations.enable_cloud_dataset() s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False) s3 = boto3.resource('s3') bucket = s3.Bucket('stpubdata') def time_mean(): """Loop through full frame files, extract a subarray, and calculate mean. This must be done in a way that the file is deleted as soon as it is no longer necessary to keep, so we do not use up all the disk space. .. note:: Algorithm can also be modified to construct subarrays into a subcube. Returns ------- final_mean : float
def bert_tess_fullframe_main_2(): """Continuation of main function to run it across different sectors.""" import os import time import boto3 from astropy.io import fits from astropy.wcs import WCS from astroquery.mast import Observations s3 = boto3.resource('s3') bucket = s3.Bucket(name=os.environ.get('AWSBUCKETNAME')) outbucket = s3.Bucket(name=os.environ.get('CACHEBUCKETNAME')) homedir = os.environ.get('HOME') work_queue, done_queue, ologger = utils.comm_binders( bert_tess_fullframe_main_2) # Example event: # { # "tic_id": "25155310", # "sec_id": "tess-s0001-4-1", # "ra": 63.3739396231274, # "dec": -69.226822697583, # "radius": 2.5, # "cutout_width": 30, # "use_cache": "true" # } # # work_queue populated by calling Lambda for event in work_queue: tic_id = event['tic_id'] sec_id = event['sec_id'] basename = f'{sec_id}_s3_uris.txt' # noqa filename = os.path.join(homedir, basename) try: # Check if URI list already cached. # According to MAST, there is no need to invalidate cache here. ologger.info(f'Attempting to download {basename} from S3') outbucket.download_file( basename, filename, ExtraArgs={"RequestPayer": "requester"}) except Exception: # Find full frame dataset for the observation ID. ologger.info('Started quering Observations...') obs_table = Observations.query_criteria(obs_id=sec_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products( products, productSubGroupDescription="FFIC", mrp_only=False) # Use AWS S3 bucket to pull data from. Observations.enable_cloud_dataset(verbose=False) ologger.info('Started obtaining cloud URIs...') t_start = time.time() s3_urls = Observations.get_cloud_uris( filtered, include_bucket=False) t_end = time.time() ologger.info(f'Got {len(s3_urls)} URIs in {t_end - t_start} s') # Upload URI list to cache. with open(filename, 'w') as fout: for url in s3_urls: fout.write(url + os.linesep) try: outbucket.upload_file( filename, basename, ExtraArgs={"RequestPayer": "requester"}) except Exception as exc: ologger.error(str(exc)) else: ologger.info(f'Uploaded {basename} to S3') else: # Use cache if it exists. with open(filename, 'r') as fin: s3_urls = [url.strip() for url in fin.readlines()] ologger.info(f'Read {len(s3_urls)} URIs from {basename}') finally: # Clean up if os.path.exists(filename): os.remove(filename) ra = float(event['ra']) dec = float(event['dec']) # TODO: Cache good WCS for a given sector/camera/ccd combo and use # known good cache if available. # Find pixel coordinates from sky from first frame header. key = s3_urls[0] basename = key.split('/')[-1] filename = os.path.join(homedir, basename) ologger.info(f'Resolving WCS from {key}') bucket.download_file( key, filename, ExtraArgs={"RequestPayer": "requester"}) hdr = fits.getheader(filename, ext=1) if hdr.get('WCSAXES', 0) != 2: # Good WCS according to MIT ologger.error(f'{key} has invalid WCS') continue w = WCS(hdr) pix = w.all_world2pix(ra, dec, 0) xpos = round(float(pix[0])) # float needed to get rid of 0-D array ypos = round(float(pix[1])) # Clean up os.remove(filename) # The star needs to be at least 2*radii pixels away in both X and Y. radius = float(event['radius']) edge_r = 2 * radius naxis1, naxis2 = w.pixel_shape # X Y if (xpos < edge_r or xpos >= (naxis1 - edge_r) or ypos < edge_r or ypos >= (naxis2 - edge_r)): ologger.error( f'TIC f{tic_id} in {sec_id}: X={xpos},Y={ypos} not at least ' f'{edge_r} pixels away from the edge, skipping...') continue # Pass data into the next AWS Lambda function. ologger.info(f'TIC f{tic_id} in {sec_id}: Started processing ' 'full frame URIs...') for url in s3_urls: done_queue.put({ 'key': url, 'tic_id': tic_id, 'ra': ra, 'dec': dec, 'xpos': xpos, 'ypos': ypos, 'radius': radius, 'cutout_width': event['cutout_width'], 'use_cache': event['use_cache']})
def lambda_handler(event, context): """Extract light curve data from one TESS full frame image. Parameters ---------- event : dict API Gateway Lambda Proxy Input Format. Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format context : object Lambda Context runtime methods and attributes. Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ result : dict API Gateway Lambda Proxy Output Format. Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html """ # noqa obs_id = event['id'] # TESS observation ID; Example: 'tess-s0001-1-1' # TODO: Calculate some of these from the 10th frame? # For now, also takes these and pass them onto worker: payload = { 'xpos': event['xpos'], 'ypos': event['ypos'], 'radius': event['radius'], 'bright_pixel_threshold': event['bright_pixel_threshold'] } # Find full frame dataset for the observation ID. obs_table = Observations.query_criteria(obs_id=obs_id) products = Observations.get_product_list(obs_table) filtered = Observations.filter_products(products, productSubGroupDescription="FFIC", mrp_only=False) # Use AWS S3 bucket to pull data from. Observations.enable_cloud_dataset() # TODO: verbose=False ? s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False) # TODO: Timed out! Try https://docs.python.org/3/library/asyncio.html ? # TODO: Handle same Lambda call invoked multiple times by AWS? # Call tess_fullframe_worker AWS Lambda function in parallel # https://aws.amazon.com/blogs/compute/parallel-processing-in-python-with-aws-lambda/ parent_connections = [] processes = [] data = [] for url in s3_urls[:2]: # TODO: Remove [:2] when done testing payload['key'] = url parent_conn, child_conn = Pipe() parent_connections.append(parent_conn) arg = json.dumps(payload) process = Process(target=_pipe_worker, args=(arg, child_conn)) processes.append(process) for process in processes: process.start() for process in processes: process.join() for parent_connection in parent_connections: try: response = parent_connection.recv()[0] except EOFError: response = {} if 'body' not in response: # Worker Lambda threw exception continue body = json.loads(response['body']) row = (body['midtime'], body['signal'], body['background']) if np.all(list(map(np.isfinite, row))): data.append(row) # TODO: Save data as table. # filename = f'/tmp/{obs_id}_lightcurve.csv' # with open(filename) as fout: # for row in data: # fout.write(f'{row[0]},{row[1]},{row[2]}{os.linesep}') # TODO: Upload table to S3 and then delete the table locally. # TODO: Return table S3 URL below. # TODO: Do we want to plot it and upload the plot too? # If so, need to add matplotlib as dependency. return { "statusCode": 200, "body": json.dumps({ 'n_rows': len(data), 'data_url': 'TODO' }) }