def enqueue_msgs(fp): """Parse given messages and send to SQS queue. Args: fp (file-like-object): File-like-object containing a header and messages. """ read_header = False msgs = [] upload_queue = None lineNum = 0 for line in fp: lineNum += 1 if not read_header: header = json.loads(line) if 'upload_queue_url' not in header: raise KeyError('Expected upload_queue_url in header') if 'ingest_queue_url' not in header: raise KeyError('Expected ingest_queue_url in header') if 'job_id' not in header: raise KeyError('Expected job_id in header') read_header = True continue try: msgs.append(parse_line(header, line)) except: print('Error parsing line {}: {}'.format(lineNum, line)) if len(msgs) == 1 and upload_queue is None: # Instantiate the upload queue object. asDict = json.loads(msgs[0]) boss_ingest_proj = BossIngestProj.fromTileKey(asDict['tile_key']) boss_ingest_proj.job_id = header['job_id'] upload_queue = UploadQueue(boss_ingest_proj) if len(msgs) >= MAX_BATCH_MSGS: # Enqueue messages. upload_queue.sendBatchMessages(msgs) msgs = [] if len(msgs) > 0: # Final enqueue messages of remaining messages. upload_queue.sendBatchMessages(msgs)
SETTINGS = BossSettings.load() # Parse input args passed as a JSON string from the lambda loader json_event = sys.argv[1] event = json.loads(json_event) print(event) # extract bucket name and tile key from the event bucket = event['Records'][0]['s3']['bucket']['name'] tile_key = urllib.parse.unquote_plus( event['Records'][0]['s3']['object']['key']) print("Bucket: {}".format(bucket)) print("Tile key: {}".format(tile_key)) # fetch metadata from the s3 object proj_info = BossIngestProj.fromTileKey(tile_key) tile_bucket = TileBucket(proj_info.project_name) message_id, receipt_handle, metadata = tile_bucket.getMetadata(tile_key) print("Metadata: {}".format(metadata)) # Currently this is what is sent from the client for the "metadata" # metadata = {'chunk_key': 'chunk_key', # 'ingest_job': self.ingest_job_id, # 'parameters': {"upload_queue": XX # "ingest_queue": XX, # "ingest_lambda":XX, # "KVIO_SETTINGS": XX, # "STATEIO_CONFIG": XX, # "OBJECTIO_CONFIG": XX # }, # 'tile_size_x': "{}".format(self.config.config_data["ingest_job"]["tile_size"]["x"]),
def process(msg, context, region): """ Process a single message. Args: msg (dict): Contents described at the top of the file. context (Context): Lambda context object. region (str): Lambda execution region. """ job_id = int(msg['ingest_job']) chunk_key = msg['chunk_key'] tile_key = msg['tile_key'] print("Tile key: {}".format(tile_key)) proj_info = BossIngestProj.fromTileKey(tile_key) # Set the job id proj_info.job_id = msg['ingest_job'] print("Data: {}".format(msg)) # update value in the dynamo table tile_index_db = BossTileIndexDB(proj_info.project_name) chunk = tile_index_db.getCuboid(chunk_key, job_id) if chunk: if tile_index_db.cuboidReady(chunk_key, chunk["tile_uploaded_map"]): print("Chunk already has all its tiles: {}".format(chunk_key)) # Go ahead and setup to fire another ingest lambda so this tile # entry will be deleted on successful execution of the ingest lambda. chunk_ready = True else: print("Updating tile index for chunk_key: {}".format(chunk_key)) chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id) else: # First tile in the chunk print("Creating first entry for chunk_key: {}".format(chunk_key)) try: tile_index_db.createCuboidEntry(chunk_key, job_id) except ClientError as err: # Under _exceptional_ circumstances, it's possible for another lambda # to beat the current instance to creating the initial cuboid entry # in the index. error_code = err.response['Error'].get('Code', 'Unknown') if error_code == 'ConditionalCheckFailedException': print('Chunk key entry already created - proceeding.') else: raise chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id) # ingest the chunk if we have all the tiles if chunk_ready: print("CHUNK READY SENDING MESSAGE: {}".format(chunk_key)) # insert a new job in the insert queue if we have all the tiles ingest_queue = IngestQueue(proj_info) ingest_queue.sendMessage(json.dumps(msg)) # Invoke Ingest lambda function names = AWSNames.create_from_lambda_name(context.function_name) lambda_client = boto3.client('lambda', region_name=region) lambda_client.invoke( FunctionName=names.tile_ingest_lambda, InvocationType='Event', Payload=json.dumps(msg).encode()) else: print("Chunk not ready for ingest yet: {}".format(chunk_key)) print("DONE!")
def process(msg, context, region): """ Process a single message. Args: msg (dict): Contents described at the top of the file. context (Context): Lambda context object. region (str): Lambda execution region. """ job_id = int(msg['ingest_job']) chunk_key = msg['chunk_key'] tile_key = msg['tile_key'] print("Tile key: {}".format(tile_key)) proj_info = BossIngestProj.fromTileKey(tile_key) # Set the job id proj_info.job_id = msg['ingest_job'] print("Data: {}".format(msg)) # update value in the dynamo table tile_index_db = BossTileIndexDB(proj_info.project_name) chunk = tile_index_db.getCuboid(chunk_key, job_id) if chunk: if tile_index_db.cuboidReady(chunk_key, chunk["tile_uploaded_map"]): print("Chunk already has all its tiles: {}".format(chunk_key)) # Go ahead and setup to fire another ingest lambda so this tile # entry will be deleted on successful execution of the ingest lambda. chunk_ready = True else: print("Updating tile index for chunk_key: {}".format(chunk_key)) chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id) else: # First tile in the chunk print("Creating first entry for chunk_key: {}".format(chunk_key)) try: tile_index_db.createCuboidEntry(chunk_key, job_id) except ClientError as err: # Under _exceptional_ circumstances, it's possible for another lambda # to beat the current instance to creating the initial cuboid entry # in the index. error_code = err.response['Error'].get('Code', 'Unknown') if error_code == 'ConditionalCheckFailedException': print('Chunk key entry already created - proceeding.') else: raise chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id) # ingest the chunk if we have all the tiles if chunk_ready: print("CHUNK READY SENDING MESSAGE: {}".format(chunk_key)) # insert a new job in the insert queue if we have all the tiles ingest_queue = IngestQueue(proj_info) ingest_queue.sendMessage(json.dumps(msg)) # Invoke Ingest lambda function names = AWSNames.from_lambda(context.function_name) lambda_client = boto3.client('lambda', region_name=region) lambda_client.invoke( FunctionName=names.tile_ingest.lambda_, InvocationType='Event', Payload=json.dumps(msg).encode()) else: print("Chunk not ready for ingest yet: {}".format(chunk_key)) print("DONE!")