def lambda_handler(event, context): print("We got the following event:\n", event) try: if "ProxyEncode" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"] s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"] elif "Video" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["Video"]["S3Bucket"] s3key = event["Input"]["Media"]["Video"]["S3Key"] elif "Image" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["Image"]["S3Bucket"] s3key = event["Input"]["Media"]["Image"]["S3Key"] workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] except Exception: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="No valid inputs") raise MasExecutionError(output_object.return_output_object()) print("Processing s3://"+s3bucket+"/"+s3key) valid_video_types = [".avi", ".mp4", ".mov"] valid_image_types = [".png", ".jpg", ".jpeg"] file_type = os.path.splitext(s3key)[1].lower() if file_type in valid_image_types: # Image processing is synchronous. response = detect_labels(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.add_workflow_metadata(AssetId=asset_id,WorkflowExecutionId=workflow_id) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_name, workflow_id, response) if "Status" not in metadata_upload: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) else: if metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}".format(asset=asset_id)) output_object.update_workflow_status("Complete") return output_object.return_output_object() elif metadata_upload["Status"] == "Failed": output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) elif file_type in valid_video_types: # Video processing is asynchronous. job_id = start_label_detection(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() else: print("ERROR: invalid file type") output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Not a valid file type") raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got this event:\n", event) operator_object = MediaInsightsOperationHelper(event) # If Transcribe wasn't run due to silent audio, then we're done if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"] and event[ "Input"]["MetaData"]["Mediainfo_num_audio_tracks"] == "0": operator_object.update_workflow_status("Complete") return operator_object.return_output_object() try: job_id = operator_object.metadata["TranscribeJobId"] workflow_id = operator_object.workflow_execution_id asset_id = operator_object.asset_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: response = transcribe.get_transcription_job( TranscriptionJobName=job_id) print(response) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranscribeError=str(e), TranscribeJobId=job_id) raise MasExecutionError(operator_object.return_output_object()) else: if response["TranscriptionJob"][ "TranscriptionJobStatus"] == "IN_PROGRESS": operator_object.update_workflow_status("Executing") operator_object.add_workflow_metadata( TranscribeJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return operator_object.return_output_object() elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "FAILED": operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeJobId=job_id, TranscribeError=str( response["TranscriptionJob"]["FailureReason"])) raise MasExecutionError(operator_object.return_output_object()) elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "COMPLETED": transcribe_uri = response["TranscriptionJob"]["Transcript"][ "TranscriptFileUri"] http = urllib3.PoolManager() transcription = http.request('GET', transcribe_uri) transcription_data = transcription.data.decode("utf-8") transcription_json = json.loads(transcription_data) text_only_transcript = '' for transcripts in transcription_json["results"]["transcripts"]: transcript = transcripts["transcript"] text_only_transcript = text_only_transcript.join(transcript) print(text_only_transcript) dataplane = DataPlane() s3 = boto3.client('s3') transcript_storage_path = dataplane.generate_media_storage_path( asset_id, workflow_id) key = transcript_storage_path['S3Key'] + "transcript.txt" bucket = transcript_storage_path['S3Bucket'] s3.put_object(Bucket=bucket, Key=key, Body=text_only_transcript) transcription_json["TextTranscriptUri"] = { "S3Bucket": bucket, "S3Key": key } metadata_upload = dataplane.store_asset_metadata( asset_id, operator_object.name, workflow_id, transcription_json) if "Status" not in metadata_upload: operator_object.add_workflow_metadata( TranscribeError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id), TranscribeJobId=job_id) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload['Status'] == 'Success': operator_object.add_media_object('Text', metadata_upload['Bucket'], metadata_upload['Key']) operator_object.add_workflow_metadata( TranscribeJobId=job_id) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( TranscribeError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id), TranscribeJobId=job_id) operator_object.update_workflow_status("Error") raise MasExecutionError( operator_object.return_output_object()) else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="Unable to determine status") raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) try: status = event["Status"] asset_id = event['MetaData']['AssetId'] except KeyError as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Missing key {e}".format(e=e)) raise MasExecutionError(output_object.return_output_object()) # Images will have already been processed, so return if job status is already set. if status == "Complete": output_object.update_workflow_status("Complete") return output_object.return_output_object() try: job_id = event["MetaData"]["JobId"] workflow_id = event["MetaData"]["WorkflowExecutionId"] except KeyError as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(output_object.return_output_object()) # Check rekognition job status: dataplane = DataPlane() pagination_token = '' is_paginated = False # If pagination token is in event["MetaData"] then use that to start # reading reko results from where this Lambda's previous invocation left off. if ("PageToken" in event["MetaData"]): pagination_token = event["MetaData"]["PageToken"] is_paginated = True # Read and persist 10 reko pages per invocation of this Lambda for page_number in range(11): # Get reko results print("job id: " + job_id + " page token: " + pagination_token) try: response = rek.get_label_detection(JobId=job_id, NextToken=pagination_token) except rek.exceptions.InvalidPaginationTokenException as e: # Trying to reverse seek to the last valid pagination token would be difficult # to implement, so in the rare case that a pagination token expires we'll # just start over by reading from the first page. print(e) print("WARNING: Invalid pagination token found. Restarting read from first page.") pagination_token='' continue # If the reko job is IN_PROGRESS then return. We'll check again after a step function wait. if response['JobStatus'] == "IN_PROGRESS": output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() # If the reko job is FAILED then mark the workflow status as Error and return. elif response['JobStatus'] == "FAILED": output_object.update_workflow_status("Error") output_object.add_workflow_metadata(JobId=job_id, LabelDetectionError=str(response["StatusMessage"])) raise MasExecutionError(output_object.return_output_object()) # If the reko job is SUCCEEDED then save this current reko page result # and continue to next page_number. elif response['JobStatus'] == "SUCCEEDED": # If reko results contain more pages then save this page and continue to the next page if 'NextToken' in response: is_paginated = True # Persist rekognition results (current page) metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=operator_name, workflow_id=workflow_id, results=response, paginate=True, end=False) # If dataplane request succeeded then get the next pagination token and continue. if "Status" in metadata_upload and metadata_upload["Status"] == "Success": # Log that this page has been successfully uploaded to the dataplane print("Uploaded metadata for asset: {asset}, job {JobId}, page {page}".format(asset=asset_id, JobId=job_id, page=pagination_token)) # Get the next pagination token: pagination_token = response['NextToken'] # In order to avoid Lambda timeouts, we're only going to persist 10 pages then # pass the pagination token to the workflow metadata and let our step function # invoker restart this Lambda. The pagination token allows this Lambda # continue from where it left off. if page_number == 10: output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(PageToken=pagination_token, JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() # If dataplane request failed then mark workflow as failed else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id), JobId=job_id) raise MasExecutionError(output_object.return_output_object()) # If reko results contain no more pages then save this page and mark the stage complete else: # If we've been saving pages, then tell dataplane this is the last page if is_paginated: metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=operator_name, workflow_id=workflow_id, results=response, paginate=True, end=True) # If there is only one page then save to dataplane without dataplane options else: metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=operator_name, workflow_id=workflow_id, results=response) # If dataplane request succeeded then mark the stage complete if "Status" in metadata_upload and metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}, job {JobId}, page {page}".format(asset=asset_id, JobId=job_id, page=pagination_token)) output_object.add_workflow_metadata(JobId=job_id) output_object.update_workflow_status("Complete") return output_object.return_output_object() # If dataplane request failed then mark workflow as failed else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Unable to upload metadata for {asset}: {error}".format(asset=asset_id, error=metadata_upload)) output_object.add_workflow_metadata(JobId=job_id) raise MasExecutionError(output_object.return_output_object()) # If reko job failed then mark workflow as failed else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Unable to determine status") raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: bucket = operator_object.input["Media"]["Text"]["S3Bucket"] key = operator_object.input["Media"]["Text"]["S3Key"] except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="No valid inputs {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: workflow_id = operator_object.workflow_execution_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: asset_id = operator_object.asset_id except KeyError: print('No asset id for this workflow') asset_id = '' try: source_lang = operator_object.configuration["SourceLanguageCode"] target_lang = operator_object.configuration["TargetLanguageCode"] except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Language codes are not defined") raise MasExecutionError(operator_object.return_output_object()) try: s3_response = s3.get_object(Bucket=bucket, Key=key) transcribe_metadata = json.loads( s3_response["Body"].read().decode("utf-8")) transcript = transcribe_metadata["results"]["transcripts"][0][ "transcript"] except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Unable to read transcription from S3: {e}".format( e=str(e))) raise MasExecutionError(operator_object.return_output_object()) # If input text is empty then we're done. if len(transcript) < 1: operator_object.update_workflow_status("Complete") return operator_object.return_output_object() # Tell the NLTK data loader to look for files in /tmp/ nltk.data.path.append("/tmp/") # Download NLTK tokenizers to /tmp/ # We use /tmp because that's where AWS Lambda provides write access to the local file system. nltk.download('punkt', download_dir='/tmp/') # Create language tokenizer according to user-specified source language. # Default to English. if source_lang == 'fr': print("Using French dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/french.pickle') elif source_lang == 'de': print("Using German dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/german.pickle') elif source_lang == 're': print("Using Russian dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/russian.pickle') elif source_lang == 'it': print("Using Italian dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/italian.pickle') elif source_lang == 'pt': print("Using Portuguese dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle') elif source_lang == 'es': print("Using Spanish dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/spanish.pickle') else: print("Using English dictionary to find sentence boundaries.") tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') # Split input text into a list of sentences sentences = tokenizer.tokenize(transcript) print("Input text length: " + str(len(transcript))) print("Number of sentences: " + str(len(sentences))) translated_text = '' transcript_chunk = '' for sentence in sentences: # Translate can handle 5000 unicode characters but we'll process no # more than 1000 just to be on the safe side. # Even by limiting input text to 3000 characters, we've still seen # translate throttling with a RateExceeded exception. # Reducing input text to 1000 characters seemed to fix this. if (len(sentence) + len(transcript_chunk) < 1000): transcript_chunk = transcript_chunk + ' ' + sentence else: try: print("Translation input text length: " + str(len(transcript_chunk))) translation_chunk = translate_client.translate_text( Text=transcript_chunk, SourceLanguageCode=source_lang, TargetLanguageCode=target_lang) print("Translation output text length: " + str(len(translation_chunk))) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Unable to get response from translate: {e}" .format(e=str(e))) raise MasExecutionError(operator_object.return_output_object()) translated_text = translated_text + ' ' + translation_chunk[ "TranslatedText"] transcript_chunk = sentence print("Translating the final chunk of input text...") try: print("Translation input text length: " + str(len(transcript_chunk))) translation_chunk = translate_client.translate_text( Text=transcript_chunk, SourceLanguageCode=source_lang, TargetLanguageCode=target_lang) print("Translation output text length: " + str(len(translation_chunk))) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Unable to get response from translate: {e}".format( e=str(e))) raise MasExecutionError(operator_object.return_output_object()) translated_text = translated_text + ' ' + translation_chunk[ "TranslatedText"] # Put final result into a JSON object because the MIE dataplane requires it to be so. translation_result = {} translation_result["TranslatedText"] = translated_text translation_result["SourceLanguageCode"] = source_lang translation_result["TargetLanguageCode"] = target_lang print("Final translation text length: " + str(len(translated_text))) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id, translation_result) if "Status" not in metadata_upload: operator_object.add_workflow_metadata( TranslateError="Unable to upload metadata for asset: {asset}". format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload['Status'] == 'Success': operator_object.add_media_object('Text', metadata_upload['Bucket'], metadata_upload['Key']) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( TranslateError="Unable to upload metadata for asset: {asset}". format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): print("We got this event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: job_id = operator_object.metadata["comprehend_phrases_job_id"] asset_id = operator_object.asset_id workflow_id = operator_object.workflow_execution_id # If Comprehend wasn't run due to empty text input, then we're done if job_id == "Empty input --> empty output.": operator_object.update_workflow_status("Complete") return operator_object.return_output_object() except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_error="No valid job id") raise MasExecutionError(operator_object.return_output_object()) try: response = comprehend.list_key_phrases_detection_jobs(Filter={ 'JobName': job_id, }, ) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_error="Unable to get response from comprehend: {e}". format(e=e)) raise MasExecutionError(operator_object.return_output_object()) else: print(response) comprehend_status = response["KeyPhrasesDetectionJobPropertiesList"][ 0]["JobStatus"] if comprehend_status == "SUBMITTED" or comprehend_status == "IN_PROGRESS": operator_object.add_workflow_metadata( comprehend_phrases_job_id=job_id) operator_object.update_workflow_status("Executing") return operator_object.return_output_object() elif comprehend_status == "COMPLETED": output_uri = response["KeyPhrasesDetectionJobPropertiesList"][0][ "OutputDataConfig"]["S3Uri"] delimeter = '/' bucket = delimeter.join(output_uri.split(delimeter)[2:3]) file_name = output_uri.split(delimeter)[-1] key = delimeter.join( output_uri.split(delimeter)[3:-1]) + '/' + file_name comprehend_tarball = read_from_s3(bucket, key) comprehend_data = { "LanguageCode": response['KeyPhrasesDetectionJobPropertiesList'][0] ['LanguageCode'], "Results": [] } if comprehend_tarball["Status"] == "Success": input_bytes = comprehend_tarball["Object"] with tarfile.open(fileobj=BytesIO(input_bytes)) as tf: for member in tf: if member.isfile(): comprehend_data["Results"].append( tf.extractfile(member).read().decode('utf-8')) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata( asset_id, "key_phrases", workflow_id, comprehend_data) if "Status" not in metadata_upload: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_error="Unable to store key phrases data {e}" .format(e=metadata_upload)) raise MasExecutionError( operator_object.return_output_object()) else: if metadata_upload["Status"] == "Success": operator_object.add_workflow_metadata( comprehend_entity_job_id=job_id, output_uri=output_uri) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_error= "Unable to store key phrases data {e}".format( e=metadata_upload)) raise MasExecutionError( operator_object.return_output_object()) else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_entity_job_id=job_id, comprehend_error="could not retrieve output from s3: {e}". format(e=comprehend_tarball["Message"])) raise MasExecutionError(operator_object.return_output_object()) else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( comprehend_phrases_job_id=job_id, comprehend_error="comprehend returned as failed: {e}".format( e=response["KeyPhrasesDetectionJobPropertiesList"][0] ["Message"])) raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): operator_object = MediaInsightsOperationHelper(event) # Get operator parameters try: workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] if "Video" in operator_object.input["Media"]: bucket = operator_object.input["Media"]["Video"]["S3Bucket"] key = operator_object.input["Media"]["Video"]["S3Key"] file_type = key.split('.')[-1] elif "Audio" in operator_object.input["Media"]: bucket = operator_object.input["Media"]["Audio"]["S3Bucket"] key = operator_object.input["Media"]["Audio"]["S3Key"] file_type = key.split('.')[-1] elif "Image" in operator_object.input["Media"]: bucket = operator_object.input["Media"]["Image"]["S3Bucket"] key = operator_object.input["Media"]["Image"]["S3Key"] file_type = key.split('.')[-1] elif "Text" in operator_object.input["Media"]: bucket = operator_object.input["Media"]["Text"]["S3Bucket"] key = operator_object.input["Media"]["Text"]["S3Key"] file_type = key.split('.')[-1] except Exception: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( GenericDataLookupError="No valid inputs") raise MasExecutionError(operator_object.return_output_object()) # Get the metadata filename print("Looking up metadata for s3://" + bucket + "/" + key) # Get user-defined location for generic data file if "Key" in operator_object.configuration: metadata_filename = operator_object.configuration["Key"] else: operator_object.add_workflow_metadata( GenericDataLookupError="Missing S3 key for data file.") operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) if "Bucket" in operator_object.configuration: metadata_bucket = operator_object.configuration["Bucket"] else: operator_object.add_workflow_metadata( GenericDataLookupError="Missing S3 bucket for data file.") operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) # Get metadata s3 = boto3.client('s3') try: print("Getting data from s3://" + metadata_bucket + "/" + metadata_filename) data = s3.get_object(Bucket=metadata_bucket, Key=metadata_filename) metadata_json = json.loads(data['Body'].read().decode('utf-8')) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( GenericDataLookupError="Unable read datafile. " + str(e)) raise MasExecutionError(operator_object.return_output_object()) # Verify that the metadata is a dict, as required by the dataplane if (type(metadata_json) != dict): operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( GenericDataLookupError="Metadata must be of type dict. Found " + str(type(metadata_json)) + " instead.") raise MasExecutionError(operator_object.return_output_object()) # Save metadata to dataplane operator_object.add_workflow_metadata(AssetId=asset_id, WorkflowExecutionId=workflow_id) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id, metadata_json) # Validate that the metadata was saved to the dataplane if "Status" not in metadata_upload: operator_object.add_workflow_metadata( GenericDataLookupError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: # Update the workflow status if metadata_upload["Status"] == "Success": print( "Uploaded metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( GenericDataLookupError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): try: status = event["Status"] asset_id = event['MetaData']['AssetId'] except KeyError as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError="Missing key {e}".format(e=e)) raise MasExecutionError(output_object.return_output_object()) # Images will have already been processed, so return if job status is already set. if status == "Complete": # TODO: Persist rekognition output output_object.update_workflow_status("Complete") return output_object.return_output_object() try: job_id = event["MetaData"]["PersonTrackingJobId"] workflow_id = event["MetaData"]["WorkflowExecutionId"] except KeyError as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError="Missing a required metadata key {e}".format( e=e)) raise MasExecutionError(output_object.return_output_object()) # Check rekognition job status: dataplane = DataPlane() max_results = 1000 pagination_token = '' finished = False # Pagination starts on 1001th result. This while loops through each page. while not finished: response = rek.get_person_tracking(JobId=job_id, MaxResults=max_results, NextToken=pagination_token) if response['JobStatus'] == "IN_PROGRESS": finished = True output_object.update_workflow_status("Executing") output_object.add_workflow_metadata( PersonTrackingJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() elif response['JobStatus'] == "FAILED": finished = True output_object.update_workflow_status("Error") output_object.add_workflow_metadata(PersonTrackingJobId=job_id, PersonTrackingError=str( response["StatusMessage"])) raise MasExecutionError(output_object.return_output_object()) elif response['JobStatus'] == "SUCCEEDED": if 'NextToken' in response: pagination_token = response['NextToken'] # Persist rekognition results (current page) metadata_upload = dataplane.store_asset_metadata( asset_id, operator_name, workflow_id, response) if "Status" not in metadata_upload: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id)) raise MasExecutionError( output_object.return_output_object()) else: if metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}".format( asset=asset_id)) elif metadata_upload["Status"] == "Failed": output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}". format(asset=asset_id)) raise MasExecutionError( output_object.return_output_object()) else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}". format(asset=asset_id)) raise MasExecutionError( output_object.return_output_object()) else: finished = True # Persist rekognition results metadata_upload = dataplane.store_asset_metadata( asset_id, operator_name, workflow_id, response) if "Status" not in metadata_upload: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id)) raise MasExecutionError( output_object.return_output_object()) else: if metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}".format( asset=asset_id)) output_object.update_workflow_status("Complete") return output_object.return_output_object() elif metadata_upload["Status"] == "Failed": output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}". format(asset=asset_id)) raise MasExecutionError( output_object.return_output_object()) else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError= "Unable to upload metadata for asset: {asset}". format(asset=asset_id)) output_object.add_workflow_metadata( PersonTrackingJobId=job_id) raise MasExecutionError( output_object.return_output_object()) else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( PersonTrackingError="Unable to determine status") raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) bucket = '' key = '' try: if "Video" in event["Input"]["Media"]: bucket = event["Input"]["Media"]["Video"]["S3Bucket"] key = event["Input"]["Media"]["Video"]["S3Key"] elif "Image" in event["Input"]["Media"]: bucket = event["Input"]["Media"]["Image"]["S3Bucket"] key = event["Input"]["Media"]["Image"]["S3Key"] workflow_id = str(operator_object.workflow_execution_id) except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediaconvertError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later try: asset_id = operator_object.asset_id except KeyError as e: print("No asset id passed in with this workflow", e) asset_id = '' # Get metadata s3_cli = boto3.client("s3", region_name=region, config=Config(signature_version='s3v4', s3={'addressing_style': 'virtual'})) metadata_json = {} try: # The number of seconds that the Signed URL is valid: signed_url_expiration = 300 # Generate a signed URL for reading a file from S3 via HTTPS signed_url = s3_cli.generate_presigned_url('get_object', Params={'Bucket': bucket, 'Key': key}, ExpiresIn=signed_url_expiration) # Launch MediaInfo media_info = MediaInfo.parse(signed_url) # Save the result metadata_json = json.loads(media_info.to_json()) # If there's no Video, Audio, Image, or Text data then delete the file. track_types = [track['track_type'] for track in metadata_json['tracks']] if ('Video' not in track_types and 'Audio' not in track_types and 'Image' not in track_types and 'Text' not in track_types): print("ERROR: File does not contain valid video, audio, image, or text content") print("Deleting file s3://" + bucket + "/" + key) s3_cli.delete_object(Bucket=bucket, Key=key) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediainfoError="File does not contain valid video, audio, image, or text content") raise MasExecutionError(operator_object.return_output_object()) except RuntimeError as e: # If MediaInfo could not run then we assume it is not a valid # media file and delete it print("Exception:\n", e) print("ERROR: File does not contain valid video, audio, image, or text content") print("Deleting file s3://" + bucket + "/" + key) s3_cli.delete_object(Bucket=bucket, Key=key) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediainfoError="File does not contain valid video, audio, image, or text content") raise MasExecutionError(operator_object.return_output_object()) except Exception as e: print("Exception:\n", e) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediainfoError="Unable to get Mediainfo results. " + str(e)) raise MasExecutionError(operator_object.return_output_object()) # Verify that the metadata is a dict, as required by the dataplane if type(metadata_json) != dict: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( MediainfoError="Metadata must be of type dict. Found " + str(type(metadata_json)) + " instead.") raise MasExecutionError(operator_object.return_output_object()) # Pass metadata to downstream operators # Number of audio tracks is used by the Transcribe operator num_audio_tracks = len(list(filter(lambda i: i['track_type'] == 'Audio', metadata_json['tracks']))) operator_object.add_workflow_metadata(Mediainfo_num_audio_tracks=str(num_audio_tracks)) # Save metadata to dataplane operator_object.add_workflow_metadata(AssetId=asset_id, WorkflowExecutionId=workflow_id) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id, metadata_json) # Validate that the metadata was saved to the dataplane if "Status" not in metadata_upload: operator_object.add_workflow_metadata( MediainfoError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: # Update the workflow status if metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( MediainfoError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: bucket = operator_object.input["Media"]["Text"]["S3Bucket"] key = operator_object.input["Media"]["Text"]["S3Key"] except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranslateError="No valid inputs {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: workflow_id = operator_object.workflow_execution_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranslateError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: asset_id = operator_object.asset_id except KeyError: print('No asset id for this workflow') asset_id = '' try: source_lang = operator_object.configuration["SourceLanguageCode"] target_lang = operator_object.configuration["TargetLanguageCode"] except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranslateError="Language codes are not defined") raise MasExecutionError(operator_object.return_output_object()) try: s3_response = s3.get_object(Bucket=bucket, Key=key) transcribe_metadata = json.loads(s3_response["Body"].read().decode("utf-8")) transcript = transcribe_metadata["results"]["transcripts"][0]["transcript"] except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranslateError="Unable to read transcription from S3: {e}".format(e=str(e))) raise MasExecutionError(operator_object.return_output_object()) try: translation = translate_client.translate_text( Text=transcript, SourceLanguageCode=source_lang, TargetLanguageCode=target_lang ) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranslateError="Unable to get response from translate: {e}".format(e=str(e))) raise MasExecutionError(operator_object.return_output_object()) else: dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id, translation) if "Status" not in metadata_upload: operator_object.add_workflow_metadata( TranslateError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload['Status'] == 'Success': operator_object.add_media_object('Text', metadata_upload['Bucket'], metadata_upload['Key']) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( TranslateError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object())