def get_webcaptions_json(operator_object, lang): try: print("get_webcaptions_json({}".format(lang)) asset_id = operator_object.asset_id workflow_id = operator_object.workflow_execution_id except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: webcaptions_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id) bucket = webcaptions_storage_path['S3Bucket'] key = webcaptions_storage_path['S3Key']+"WebCaptions"+"_"+lang+".json" print("get object {} {}".format(bucket, key)) data = s3.get_object(Bucket=bucket, Key=key) webcaptions = json.loads(data['Body'].read().decode('utf-8')) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Unable to get webcaptions from dataplane {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) return webcaptions
def PutWebCaptionsCollection(self, operator, collection): collection_dict = {} collection_dict["CaptionsCollection"] = collection response = dataplane.store_asset_metadata(self.asset_id, self.operator_object.name, self.workflow_id, collection_dict) if "Status" not in response: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata( WebCaptionsError= "Unable to store captions collection metadata {e}".format( e=response)) raise MasExecutionError( self.operator_object.return_output_object()) else: if response["Status"] == "Success": self.operator_object.update_workflow_status("Complete") return self.operator_object.return_output_object() else: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata( WebCaptionsError="Unable to store captions collection {e}". format(e=response)) raise MasExecutionError( self.operator_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) try: s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"] s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"] workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] except Exception: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(TechnicalCueDetectionError="No valid inputs") raise MasExecutionError(output_object.return_output_object()) print("Processing s3://" + s3bucket + "/" + s3key) valid_video_types = [".avi", ".mp4", ".mov"] file_type = os.path.splitext(s3key)[1].lower() if file_type in valid_video_types: # Video processing is asynchronous. job_id = start_technical_cue_detection(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() else: print("ERROR: invalid file type") output_object.update_workflow_status("Error") output_object.add_workflow_metadata(TechnicalCueDetectionError="Not a valid file type") raise MasExecutionError(output_object.return_output_object())
def PutWebCaptions(self, webcaptions, language_code=None, source=""): webcaptions_operator_name = self.WebCaptionsOperatorName(language_code, source) WebCaptions = {"WebCaptions": webcaptions} response = dataplane.store_asset_metadata(asset_id=self.asset_id, operator_name=webcaptions_operator_name, workflow_id=self.workflow_id, results=WebCaptions, paginate=False) if "Status" not in response: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata(WebCaptionsError="Unable to store captions {} {e}".format(webcaptions_operator_name ,e=response)) raise MasExecutionError(self.operator_object.return_output_object()) else: if response["Status"] == "Success": return self.operator_object.return_output_object() else: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata( WebCaptionsError="Unable to store captions {} {e}".format(webcaptions_operator_name, e=response)) raise MasExecutionError(self.operator_object.return_output_object()) metadata = { "OperatorName": webcaptions_operator_name, "WorkflowId": self.workflow_id, "LanguageCode": language_code } return metadata
def lambda_handler(event, context): print("We got the following event:\n", event) try: if "Video" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"] s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"] elif "Image" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["Image"]["S3Bucket"] s3key = event["Input"]["Media"]["Image"]["S3Key"] workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] except Exception: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(PersonTrackingError="No valid inputs") raise MasExecutionError(output_object.return_output_object()) print("Processing s3://"+s3bucket+"/"+s3key) valid_video_types = [".avi", ".mp4", ".mov"] valid_image_types = [".png", ".jpg", ".jpeg"] file_type = os.path.splitext(s3key)[1].lower() if file_type in valid_image_types: # TODO: implement image handling output_object.update_workflow_status("Complete") output_object.add_workflow_metadata(WorkflowExecutionId=workflow_id) return output_object.return_output_object() elif file_type in valid_video_types: job_id = start_person_tracking(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() else: print("ERROR: invalid file type") output_object.update_workflow_status("Error") output_object.add_workflow_metadata(PersonTrackingError="Not a valid file type") raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) try: if "ProxyEncode" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"] s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"] elif "Video" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["Video"]["S3Bucket"] s3key = event["Input"]["Media"]["Video"]["S3Key"] elif "Image" in event["Input"]["Media"]: s3bucket = event["Input"]["Media"]["Image"]["S3Bucket"] s3key = event["Input"]["Media"]["Image"]["S3Key"] workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] except Exception: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="No valid inputs") raise MasExecutionError(output_object.return_output_object()) print("Processing s3://"+s3bucket+"/"+s3key) valid_video_types = [".avi", ".mp4", ".mov"] valid_image_types = [".png", ".jpg", ".jpeg"] file_type = os.path.splitext(s3key)[1].lower() if file_type in valid_image_types: # Image processing is synchronous. response = detect_labels(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.add_workflow_metadata(AssetId=asset_id,WorkflowExecutionId=workflow_id) dataplane = DataPlane() metadata_upload = dataplane.store_asset_metadata(asset_id, operator_name, workflow_id, response) if "Status" not in metadata_upload: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) else: if metadata_upload["Status"] == "Success": print("Uploaded metadata for asset: {asset}".format(asset=asset_id)) output_object.update_workflow_status("Complete") return output_object.return_output_object() elif metadata_upload["Status"] == "Failed": output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) else: output_object.update_workflow_status("Error") output_object.add_workflow_metadata( LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id)) raise MasExecutionError(output_object.return_output_object()) elif file_type in valid_video_types: # Video processing is asynchronous. job_id = start_label_detection(s3bucket, urllib.parse.unquote_plus(s3key)) output_object.update_workflow_status("Executing") output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return output_object.return_output_object() else: print("ERROR: invalid file type") output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError="Not a valid file type") raise MasExecutionError(output_object.return_output_object())
def check_wait_operation_lambda(event, context): ''' Check if a workflow is still in a Waiting state. event is - Operation input - Operation configuration returns: Operation output ''' logger.info(json.dumps(event)) operator_object = MediaInsightsOperationHelper(event) execution_table = DYNAMO_RESOURCE.Table(WORKFLOW_EXECUTION_TABLE_NAME) response = execution_table.get_item( Key={ 'Id': operator_object.workflow_execution_id }, ConsistentRead=True) if "Item" in response: workflow_execution = response["Item"] else: workflow_execution = None # raise ChaliceViewError( operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(WaitError="Unable to find Waiting workflow execution {} {e}".format( operator_object.workflow_execution_id, e=str(e))) raise MasExecutionError(operator_object.return_output_object()) logger.info("workflow_execution: {}".format( json.dumps(workflow_execution))) if workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_WAITING: operator_object.update_workflow_status("Executing") return operator_object.return_output_object() elif workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_STARTED: operator_object.update_workflow_status("Complete") else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(WaitError="Unexpected workflow execution status {}".format( workflow_execution["Status"])) raise MasExecutionError(operator_object.return_output_object()) return operator_object.return_output_object()
def start_wait_operation_lambda(event, context): ''' Pause a workflow to wait for external processing event is - Operation input - Operation configuration returns: Operation output ''' logger.info(json.dumps(event)) operator_object = MediaInsightsOperationHelper(event) try: update_workflow_execution_status(operator_object.workflow_execution_id, awsmie.WORKFLOW_STATUS_WAITING, "") except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( WaitError="Unable to set workflow status to Waiting {e}".format( e=str(e))) raise MasExecutionError(operator_object.return_output_object()) return operator_object.return_output_object()
def __init__(self, operator_object): """ :param event: The event passed in to the operator """ print("WebCaptions operator_object = {}".format(operator_object)) self.operator_object = operator_object try: self.transcribe_operator_name = "Transcribe" self.workflow_id = operator_object.workflow_execution_id self.asset_id = operator_object.asset_id self.marker = "<123>" if "SourceLanguageCode" in self.operator_object.configuration: self.source_language_code = self.operator_object.configuration[ "SourceLanguageCode"] self.operator_name_with_lang = self.operator_object.name + "_" + self.source_language_code if "TargetLanguageCode" in self.operator_object.configuration: self.target_language_code = self.operator_object.configuration[ "TargetLanguageCode"] except KeyError as e: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata( WebCaptionsError="No valid inputs {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object())
def create_vtt(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) webcaptions_object = WebCaptions(operator_object) try: targetLanguageCodes = webcaptions_object.operator_object.configuration[ "TargetLanguageCodes"] except KeyError as e: webcaptions_object.operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( WebCaptionsError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) captions_collection = [] for lang in targetLanguageCodes: webcaptions = [] webcaptions = webcaptions_object.GetWebCaptions(lang) #captions = get_webcaptions_json(self.operator_object, lang) vtt = webcaptions_object.WebCaptionsToVTT(webcaptions) metadata = webcaptions_object.PutVTT(lang, vtt) captions_collection.append(metadata) data = {} data["CaptionsCollection"] = captions_collection webcaptions_object.PutMediaCollection(operator_object.name, data) operator_object.update_workflow_status("Complete") return operator_object.return_output_object()
def start_translate_webcaptions(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) webcaptions_object = WebCaptions(operator_object) try: source_lang = operator_object.configuration["SourceLanguageCode"] target_langs = operator_object.configuration["TargetLanguageCodes"] except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranslateError="Language codes are not defined") raise MasExecutionError(operator_object.return_output_object()) try: terminology_names = operator_object.configuration["TerminologyNames"] except KeyError: terminology_names = [] #webcaptions = get_webcaptions(operator_object, source_lang) webcaptions = webcaptions_object.GetWebCaptions(source_lang) # Translate takes a list of target languages, but it only allow on item in the list. Too bad # life would be so much easier if it truely allowed many targets. webcaptions_object.TranslateWebCaptions(webcaptions, source_lang, target_langs, terminology_names) return operator_object.return_output_object()
def vttToWebCaptions(operator_object, vttObject): webcaptions = [] # Get metadata s3 = boto3.client('s3') try: print("Getting data from s3://" + vttObject["Bucket"] + "/" + vttObject["Key"]) data = s3.get_object(Bucket=vttObject["Bucket"], Key=vttObject["Key"]) vtt = data['Body'].read().decode('utf-8') except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( WebCaptionsError="Unable read VTT file. " + str(e)) raise MasExecutionError(operator_object.return_output_object()) buffer = StringIO(vtt) for caption in webvtt.read_buffer(buffer): webcaption = {} webcaption["start"] = formatTimeVTTtoSeconds(caption.start) webcaption["end"] = formatTimeVTTtoSeconds(caption.end) webcaption["caption"] = caption.text webcaptions.append(webcaption) return webcaptions
def __init__(self, operator_object): """ :param event: The event passed in to the operator """ print("WebCaptions operator_object = {}".format(operator_object)) self.operator_object = operator_object try: self.transcribe_operator_name = "TranscribeVideo" self.workflow_id = operator_object.workflow_execution_id self.asset_id = operator_object.asset_id self.marker = "<span>" self.contentType = "text/html" self.existing_subtitles = False if "SourceLanguageCode" in self.operator_object.configuration: self.source_language_code = self.operator_object.configuration["SourceLanguageCode"] self.operator_name_with_lang = self.operator_object.name+"_"+self.source_language_code if "TargetLanguageCode" in self.operator_object.configuration: self.target_language_code = self.operator_object.configuration["TargetLanguageCode"] if "ExistingSubtitlesObject" in self.operator_object.configuration: self.existing_subtitles_object = self.operator_object.configuration["ExistingSubtitlesObject"] self.existing_subtitles = True except KeyError as e: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata(WebCaptionsError="No valid inputs {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object())
def start_face_search(bucket, key, collection_id): rek = boto3.client('rekognition') try: # First make sure we can access the face collection rek.describe_collection(CollectionId=collection_id) # Then start face recognition response = rek.start_face_search( Video={ 'S3Object': { 'Bucket': bucket, 'Name': key } }, CollectionId=collection_id, NotificationChannel={ 'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'], 'RoleArn': os.environ['REKOGNITION_ROLE_ARN'] }, ) print('Job Id (face search): ' + response['JobId']) return response['JobId'] except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(FaceSearchError=str(e)) raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) # Get media metadata from input event try: asset_id = operator_object.asset_id bucket = operator_object.input["Media"]["Video"]["S3Bucket"] except Exception as exception: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( VmapGenerationError="Missing a required metadata key {e}".format( e=exception)) raise MasExecutionError(operator_object.return_output_object()) # Get slots metadata from dataplane try: slots = {} params = {"asset_id": asset_id, "operator_name": "slotDetection"} while True: resp = dataplane.retrieve_asset_metadata(**params) if "operator" in resp and resp["operator"] == "slotDetection": __update_and_merge_lists(slots, resp["results"]) if "cursor" not in resp: break params["cursor"] = resp["cursor"] print("slots: {}".format(slots)) except Exception as exception: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( VmapGenerationError="Unable to retrieve metadata for asset {}: {}". format(asset_id, exception)) raise MasExecutionError(operator_object.return_output_object()) try: # Select slots with highest scores slots["slots"].sort(key=lambda slot: slot["Score"]) top_slots = slots["slots"][-top_slots_qty:] # Generate VMAP and add object key = 'private/assets/{}/vmap/ad_breaks.vmap'.format(asset_id) __write_vmap(top_slots, bucket, key) operator_object.add_media_object("VMAP", bucket, key) # Set workflow status complete operator_object.update_workflow_status("Complete") return operator_object.return_output_object() except Exception as exception: print("Exception:\n", exception) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(VmapGenerationError=exception) raise MasExecutionError(operator_object.return_output_object())
def search_faces_by_image(bucket, key, collection_id): try: response = rek.search_faces_by_image(CollectionId=collection_id, Image={'S3Object':{'Bucket':bucket, 'Name':key}}) except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(FaceSearchError=str(e)) raise MasExecutionError(output_object.return_output_object()) return response
def detect_text(bucket, key): try: response = rek.detect_text(Image={'S3Object':{'Bucket':bucket, 'Name':key}}) except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(TextDetectionError=str(e)) raise MasExecutionError(output_object.return_output_object()) return response
def recognize_celebrities(bucket, key): try: response = rek.recognize_celebrities(Image={'S3Object':{'Bucket':bucket, 'Name':key}}) except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(CelebrityRecognitionError=str(e)) raise MasExecutionError(output_object.return_output_object()) return response
def detect_moderation_labels(bucket, key): rek = boto3.client('rekognition') try: response = rek.detect_moderation_labels(Image={'S3Object':{'Bucket':bucket, 'Name':key}}) except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(ContentModerationError=str(e)) raise MasExecutionError(output_object.return_output_object()) return response
def test_lambda_handler(event, context, operator_name, mediaType, status, type): try: print(json.dumps(event)) # set output status, media, and metatdata for workflow - these get passed to other # stages of the workflow through the control plane dataplane = DataPlane() operator_object = MediaInsightsOperationHelper(event) operator_object.update_workflow_status("Complete") metadata = {} metadata[operator_object.name] = { "Meta": "Workflow metadata for " + operator_object.name } if "TestCustomConfig" in operator_object.configuration: metadata[operator_object. name]["TestCustomConfig"] = operator_object.configuration[ "TestCustomConfig"] operator_object.add_workflow_metadata_json(metadata) if "OutputMediaType" in operator_object.configuration: mediaType = operator_object.configuration["OutputMediaType"] if mediaType == "Video": operator_object.add_media_object( "Video", "S3BucketFrom{}".format(operator_object.name), "S3/Key/From/{}/video".format(operator_object.name)) elif mediaType == "Audio": operator_object.add_media_object( "Audio", "S3BucketFrom{}".format(operator_object.name), "S3/Key/From/{}/audio".format(operator_object.name)) elif mediaType == "Image": operator_object.add_media_object( "Text", "S3BucketFrom{}".format(operator_object.name), "S3/Key/From/{}/image".format(operator_object.name)) elif mediaType == "Text": operator_object.add_media_object( "Text", "S3BucketFrom{}".format(operator_object.name), "S3/Key/From/{}/text".format(operator_object.name)) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( Message="Oh no! Something went wrong: {}".format(str(e))) raise MasExecutionError(operator_object.return_output_object()) else: if status == "Fail": operator_object.update_workflow_status("Error") else: operator_object.update_workflow_status("Complete") return operator_object.return_output_object()
def put_webcaptions_json(operator_object, webcaptions, lang): try: print("put_webcaptions_json({}".format(lang)) asset_id = operator_object.asset_id webcaptions_lang = "WebCaptions"+"_"+lang workflow_id = operator_object.workflow_execution_id except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) webcaptions_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id) bucket = webcaptions_storage_path['S3Bucket'] key = webcaptions_storage_path['S3Key']+"WebCaptions"+"_"+lang+".json" print("put object {} {}".format(bucket, key)) s3.put_object(Bucket=bucket, Key=key, Body=json.dumps(webcaptions)) operator_metadata = {"S3Bucket": bucket, "S3Key": key, "Operator": "WebCaptions"+"_"+lang} metadata_upload = dataplane.store_asset_metadata(asset_id, "WebCaptions"+"_"+lang, workflow_id, operator_metadata) if "Status" not in metadata_upload: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Unable to store webcaptions file {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload["Status"] == "Success": operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store webcaptions file {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) return operator_metadata
def CaptionsOperatorName(self, language_code=None): # Shouldn't assume WebCaptions operator is WebCaptions, maybe pass it in the configuration? operator_name = "Captions" if language_code != None: return operator_name+"_"+language_code try: name = operator_name+"_"+self.source_language_code except KeyError: self.operator_object.update_workflow_status("Error") self.operator_object.add_workflow_metadata(WebCaptionsError="Missing language code for WebCaptions {e}".format(e=e)) raise MasExecutionError(self.operator_object.return_output_object()) print("CaptionsOperatorName() Name {}".format(name)) return name
def start_label_detection(bucket, key): try: response = rek.start_label_detection( Video={'S3Object': { 'Bucket': bucket, 'Name': key }}, NotificationChannel={ 'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'], 'RoleArn': os.environ['REKOGNITION_ROLE_ARN'] }) print('Job Id (label_detection): ' + response['JobId']) return response['JobId'] except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(LabelDetectionError=str(e)) raise MasExecutionError(output_object.return_output_object())
def start_technical_cue_detection(bucket, key): try: response = rek.start_segment_detection( Video={'S3Object': { 'Bucket': bucket, 'Name': key }}, NotificationChannel={ 'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'], 'RoleArn': os.environ['REKOGNITION_ROLE_ARN'] }, SegmentTypes=['TECHNICAL_CUE']) print('Job Id (techncal_cue_detection): ' + response['JobId']) return response['JobId'] except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(TechnicalCueDetectionError=str(e)) raise MasExecutionError(output_object.return_output_object())
def start_face_detection(bucket, key): rek = boto3.client('rekognition') try: response = rek.start_face_detection( Video={'S3Object': { 'Bucket': bucket, 'Name': key }}, NotificationChannel={ 'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'], 'RoleArn': os.environ['REKOGNITION_ROLE_ARN'] }, FaceAttributes='ALL') print('Job Id (face detection): ' + response['JobId']) return response['JobId'] except Exception as e: output_object.update_workflow_status("Error") output_object.add_workflow_metadata(FaceDetectionError=str(e)) raise MasExecutionError(output_object.return_output_object())
def lambda_handler(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: workflow_id = str(operator_object.workflow_execution_id) bucket = operator_object.input["Media"]["Video"]["S3Bucket"] key = operator_object.input["Media"]["Video"]["S3Key"] except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( MediaconvertError="Missing a required metadata key {e}".format( e=e)) raise MasExecutionError(operator_object.return_output_object()) # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later try: asset_id = operator_object.asset_id except KeyError as e: print("No asset id passed in with this workflow", e) asset_id = '' file_input = "s3://" + bucket + "/" + key destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/workflows/" + workflow_id + "/" thumbnail_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/" try: response = mediaconvert.describe_endpoints() except Exception as e: print("Exception:\n", e) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediaconvertError=str(e)) raise MasExecutionError(operator_object.return_output_object()) else: mediaconvert_endpoint = response["Endpoints"][0]["Url"] customer_mediaconvert = boto3.client( "mediaconvert", region_name=region, endpoint_url=mediaconvert_endpoint) try: response = customer_mediaconvert.create_job( Role=mediaconvert_role, Settings={ "OutputGroups": [{ "Name": "File Group", "Outputs": [{ "ContainerSettings": { "Container": "MP4", "Mp4Settings": { "CslgAtom": "INCLUDE", "FreeSpaceBox": "EXCLUDE", "MoovPlacement": "PROGRESSIVE_DOWNLOAD" } }, "AudioDescriptions": [{ "AudioTypeControl": "FOLLOW_INPUT", "AudioSourceName": "Audio Selector 1", "CodecSettings": { "Codec": "AAC", "AacSettings": { "AudioDescriptionBroadcasterMix": "NORMAL", "Bitrate": 96000, "RateControlMode": "CBR", "CodecProfile": "LC", "CodingMode": "CODING_MODE_2_0", "RawFormat": "NONE", "SampleRate": 48000, "Specification": "MPEG4" } }, "LanguageCodeControl": "FOLLOW_INPUT" }], "Extension": "mp4", "NameModifier": "_audio" }], "OutputGroupSettings": { "Type": "FILE_GROUP_SETTINGS", "FileGroupSettings": { "Destination": destination } } }, { "CustomName": "thumbnail", "Name": "File Group", "Outputs": [{ "ContainerSettings": { "Container": "RAW" }, "VideoDescription": { "ScalingBehavior": "DEFAULT", "TimecodeInsertion": "DISABLED", "AntiAlias": "ENABLED", "Sharpness": 50, "CodecSettings": { "Codec": "FRAME_CAPTURE", "FrameCaptureSettings": { "FramerateNumerator": 1, "FramerateDenominator": 5, "MaxCaptures": 2, "Quality": 80 } }, "DropFrameTimecode": "ENABLED", "ColorMetadata": "INSERT" }, "NameModifier": "_thumbnail" }], "OutputGroupSettings": { "Type": "FILE_GROUP_SETTINGS", "FileGroupSettings": { "Destination": thumbnail_destination } } }], "AdAvailOffset": 0, "Inputs": [{ "AudioSelectors": { "Audio Selector 1": { "Offset": 0, "DefaultSelection": "DEFAULT", "ProgramSelection": 1 } }, "VideoSelector": { "ColorSpace": "FOLLOW" }, "FilterEnable": "AUTO", "PsiControl": "USE_PSI", "FilterStrength": 0, "DeblockFilter": "DISABLED", "DenoiseFilter": "DISABLED", "TimecodeSource": "EMBEDDED", "FileInput": file_input }] }) # TODO: Add support for boto client error handling except Exception as e: print("Exception:\n", e) operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(MediaconvertError=str(e)) raise MasExecutionError(operator_object.return_output_object()) else: job_id = response['Job']['Id'] operator_object.update_workflow_status("Executing") operator_object.add_workflow_metadata(MediaconvertJobId=job_id, MediaconvertInputFile=key, AssetId=asset_id, WorkflowExecutionId=workflow_id) return operator_object.return_output_object()
def lambda_handler(event, context): print("We got this event:\n", event) operator_object = MediaInsightsOperationHelper(event) # If Transcribe wasn't run due to silent audio, then we're done if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"] and event[ "Input"]["MetaData"]["Mediainfo_num_audio_tracks"] == "0": operator_object.update_workflow_status("Complete") return operator_object.return_output_object() try: job_id = operator_object.metadata["TranscribeJobId"] workflow_id = operator_object.workflow_execution_id asset_id = operator_object.asset_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: response = transcribe.get_transcription_job( TranscriptionJobName=job_id) print(response) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(TranscribeError=str(e), TranscribeJobId=job_id) raise MasExecutionError(operator_object.return_output_object()) else: if response["TranscriptionJob"][ "TranscriptionJobStatus"] == "IN_PROGRESS": operator_object.update_workflow_status("Executing") operator_object.add_workflow_metadata( TranscribeJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return operator_object.return_output_object() elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "FAILED": operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeJobId=job_id, TranscribeError=str( response["TranscriptionJob"]["FailureReason"])) raise MasExecutionError(operator_object.return_output_object()) elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "COMPLETED": transcribe_uri = response["TranscriptionJob"]["Transcript"][ "TranscriptFileUri"] http = urllib3.PoolManager() transcription = http.request('GET', transcribe_uri) transcription_data = transcription.data.decode("utf-8") transcription_json = json.loads(transcription_data) text_only_transcript = '' for transcripts in transcription_json["results"]["transcripts"]: transcript = transcripts["transcript"] text_only_transcript = text_only_transcript.join(transcript) print(text_only_transcript) dataplane = DataPlane() s3 = boto3.client('s3') transcript_storage_path = dataplane.generate_media_storage_path( asset_id, workflow_id) key = transcript_storage_path['S3Key'] + "transcript.txt" bucket = transcript_storage_path['S3Bucket'] s3.put_object(Bucket=bucket, Key=key, Body=text_only_transcript) transcription_json["TextTranscriptUri"] = { "S3Bucket": bucket, "S3Key": key } metadata_upload = dataplane.store_asset_metadata( asset_id, operator_object.name, workflow_id, transcription_json) if "Status" not in metadata_upload: operator_object.add_workflow_metadata( TranscribeError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id), TranscribeJobId=job_id) operator_object.update_workflow_status("Error") raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload['Status'] == 'Success': operator_object.add_media_object('Text', metadata_upload['Bucket'], metadata_upload['Key']) operator_object.add_workflow_metadata( TranscribeJobId=job_id) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.add_workflow_metadata( TranscribeError= "Unable to upload metadata for asset: {asset}".format( asset=asset_id), TranscribeJobId=job_id) operator_object.update_workflow_status("Error") raise MasExecutionError( operator_object.return_output_object()) else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="Unable to determine status") raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context): print("We got this event:\n", event) valid_types = ["mp3", "mp4", "wav", "flac"] identify_language = False transcribe_job_config = {} optional_settings = {} model_settings = {} job_execution_settings = {} content_redaction_settings = {} identify_language = False language_options = [] operator_object = MediaInsightsOperationHelper(event) workflow_id = str(event["WorkflowExecutionId"]) asset_id = event['AssetId'] job_id = "transcribe" + "-" + workflow_id try: if "ProxyEncode" in event["Input"]["Media"]: bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"] key = event["Input"]["Media"]["ProxyEncode"]["S3Key"] elif "Video" in event["Input"]["Media"]: bucket = event["Input"]["Media"]["Video"]["S3Bucket"] key = event["Input"]["Media"]["Video"]["S3Key"] elif "Audio" in event["Input"]["Media"]: bucket = event["Input"]["Media"]["Audio"]["S3Bucket"] key = event["Input"]["Media"]["Audio"]["S3Key"] file_type = key.split('.')[-1] except Exception: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="No valid inputs") raise MasExecutionError(operator_object.return_output_object()) if file_type not in valid_types: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="Not a valid file type") raise MasExecutionError(operator_object.return_output_object()) try: language_code = operator_object.configuration["TranscribeLanguage"] custom_vocab = operator_object.configuration["VocabularyName"] optional_settings["VocabularyName"] = custom_vocab except KeyError: # No custom vocab pass try: if "TranscribeLanguage" in operator_object.configuration: language_code = operator_object.configuration["TranscribeLanguage"] if language_code == 'auto': identify_language = True else: identify_language = True except KeyError: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeError="No language code defined") raise MasExecutionError(operator_object.return_output_object()) media_file = 'https://s3.' + region + '.amazonaws.com/' + bucket + '/' + key # Read optional transcription job settings: if "VocabularyName" in operator_object.configuration: option_value = operator_object.configuration["VocabularyName"] optional_settings["VocabularyName"] = option_value if "ShowSpeakerLabels" in operator_object.configuration: option_value = operator_object.configuration["ShowSpeakerLabels"] optional_settings["ShowSpeakerLabels"] = option_value if "MaxSpeakerLabels" in operator_object.configuration: option_value = operator_object.configuration["MaxSpeakerLabels"] optional_settings["MaxSpeakerLabels"] = option_value if "ChannelIdentification" in operator_object.configuration: option_value = operator_object.configuration["ChannelIdentification"] optional_settings["ChannelIdentification"] = option_value if "MaxAlternatives" in operator_object.configuration: option_value = operator_object.configuration["MaxAlternatives"] optional_settings["MaxAlternatives"] = option_value if "VocabularyFilterName" in operator_object.configuration: option_value = operator_object.configuration["VocabularyFilterName"] optional_settings["VocabularyFilterName"] = option_value if "VocabularyFilterMethod" in operator_object.configuration: option_value = operator_object.configuration["VocabularyFilterMethod"] optional_settings["VocabularyFilterMethod"] = option_value if "LanguageModelName" in operator_object.configuration: option_value = operator_object.configuration["LanguageModelName"] model_settings["LanguageModelName"] = option_value if "AllowDeferredExecution" in operator_object.configuration: option_value = operator_object.configuration["AllowDeferredExecution"] job_execution_settings["AllowDeferredExecution"] = option_value if "DataAccessRoleArn" in operator_object.configuration: option_value = operator_object.configuration["DataAccessRoleArn"] job_execution_settings["DataAccessRoleArn"] = option_value if "RedactionType" in operator_object.configuration: option_value = operator_object.configuration["RedactionType"] content_redaction_settings["RedactionType"] = option_value if "RedactionOutput" in operator_object.configuration: option_value = operator_object.configuration["RedactionOutput"] content_redaction_settings["RedactionOutput"] = option_value if "IdentifyLanguage" in operator_object.configuration: option_value = operator_object.configuration["IdentifyLanguage"] identify_language = option_value if "LanguageOptions" in operator_object.configuration: option_value = operator_object.configuration["LanguageOptions"] language_options = option_value # Combine all the defined transcription job settings into a single dict: transcribe_job_config["TranscriptionJobName"] = job_id transcribe_job_config["Media"] = {"MediaFileUri": media_file} transcribe_job_config["MediaFormat"] = file_type transcribe_job_config["LanguageCode"] = language_code transcribe_job_config["IdentifyLanguage"] = identify_language if len(optional_settings) > 0: transcribe_job_config["Settings"] = optional_settings if len(model_settings) > 0: transcribe_job_config["ModelSettings"] = model_settings if len(job_execution_settings) > 0: transcribe_job_config["JobExecutionSettings"] = job_execution_settings if len(content_redaction_settings) > 0: transcribe_job_config["ContentRedaction"] = content_redaction_settings if len(language_options) > 0: transcribe_job_config["LanguageOptions"] = language_options # If mediainfo data is available then use it to avoid transcribing silent videos. if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"]: num_audio_tracks = event["Input"]["MetaData"][ "Mediainfo_num_audio_tracks"] # Check to see if audio tracks were detected by mediainfo if num_audio_tracks == "0": # If there is no input audio then we're done. operator_object.update_workflow_status("Complete") return operator_object.return_output_object() try: if identify_language: transcribe_job_config['IdentifyLanguage'] = True del transcribe_job_config["LanguageCode"] # Run the transcribe job. # The ** operator converts the job config dict to keyword arguments. response = transcribe.start_transcription_job(**transcribe_job_config) print(response) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(transcribe_error=str(e)) raise MasExecutionError(operator_object.return_output_object()) else: if response["TranscriptionJob"][ "TranscriptionJobStatus"] == "IN_PROGRESS": operator_object.update_workflow_status("Executing") operator_object.add_workflow_metadata( TranscribeJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return operator_object.return_output_object() elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "FAILED": operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeJobId=job_id, TranscribeError=str( response["TranscriptionJob"]["FailureReason"])) raise MasExecutionError(operator_object.return_output_object()) elif response["TranscriptionJob"][ "TranscriptionJobStatus"] == "COMPLETE": operator_object.update_workflow_status("Executing") operator_object.add_workflow_metadata( TranscribeJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id) return operator_object.return_output_object() else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( TranscribeJobId=job_id, TranscribeError="Unhandled error for this job: {job_id}". format(job_id=job_id)) raise MasExecutionError(operator_object.return_output_object())
def web_to_vtt(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: asset_id = operator_object.asset_id except KeyError: print('No asset id for this workflow') asset_id = '' try: targetLanguageCodes = operator_object.configuration["TargetLanguageCodes"] workflow_id = operator_object.workflow_execution_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) captions_collection = [] for lang in targetLanguageCodes: captions = [] captionsOperatorName = "WebCaptions_"+lang # response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName) # #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything # if "operator" in response and response["operator"] == captionsOperatorName: # captions.append(response["results"]) # while "cursor" in response: # response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName, cursor=response["cursor"]) # #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything # if response["operator"] == captionsOperatorName: # captions.append(response["results"]) captions = get_webcaptions_json(operator_object, lang) vtt = 'WEBVTT\n\n' for i in range(len(captions)): caption = captions[i] vtt += formatTimeVTT(float(caption["start"])) + ' --> ' + formatTimeVTT(float(caption["end"])) + '\n' vtt += caption["caption"] + '\n\n' response = dataplane.generate_media_storage_path(asset_id, workflow_id) print(json.dumps(response)) bucket = response["S3Bucket"] key = response["S3Key"]+'Captions_'+lang+'.vtt' s3_object = s3_resource.Object(bucket, key) s3_object.put(Body=vtt) metadata = { "OperatorName": "VTTCaptions_"+lang, "Results": {"S3Bucket": bucket, "S3Key": key}, "WorkflowId": workflow_id, "LanguageCode": lang } captions_collection.append(metadata) data = {} data["CaptionsCollection"] = captions_collection metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id, data) if "Status" not in metadata_upload: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload["Status"] == "Success": operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object())
def web_captions(event, context): print("We got the following event:\n", event) operator_object = MediaInsightsOperationHelper(event) try: bucket = operator_object.input["Media"]["Text"]["S3Bucket"] key = operator_object.input["Media"]["Text"]["S3Key"] except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="No valid inputs {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: lang = operator_object.configuration["SourceLanguageCode"] except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="No language codes {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: workflow_id = operator_object.workflow_execution_id except KeyError as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e)) raise MasExecutionError(operator_object.return_output_object()) try: asset_id = operator_object.asset_id except KeyError: print('No asset id for this workflow') asset_id = '' try: s3_response = s3.get_object(Bucket=bucket, Key=key) transcribe_metadata = json.loads(s3_response["Body"].read().decode("utf-8")) except Exception as e: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata(CaptionsError="Unable to read transcription from S3: {e}".format(e=str(e))) raise MasExecutionError(operator_object.return_output_object()) endTime = 0.0 maxLength = 50 wordCount = 0 maxWords = 12 maxSilence = 1.5 captions = [] caption = None for item in transcribe_metadata["results"]["items"]: isPunctuation = item["type"] == "punctuation" if caption is None: # Start of a line with punctuation, just skip it if isPunctuation: continue # Create a new caption line caption = { "start": float(item["start_time"]), "caption": "", "wordConfidence": [] } if not isPunctuation: startTime = float(item["start_time"]) # Check to see if there has been a long silence # between the last recorded word and start a new # caption if this is the case, ending the last time # as this one starts. if (len(caption["caption"]) > 0) and ((endTime + maxSilence) < startTime): caption["end"] = startTime captions.append(caption) caption = { "start": float(startTime), "caption": "", "wordConfidence": [] } wordCount = 0 endTime = float(item["end_time"]) requiresSpace = (not isPunctuation) and (len(caption["caption"]) > 0) if requiresSpace: caption["caption"] += " " # Process tweaks text = item["alternatives"][0]["content"] confidence = item["alternatives"][0]["confidence"] textLower = text.lower() caption["caption"] += text # Track raw word confidence if not isPunctuation: caption["wordConfidence"].append( { "w": textLower, "c": float(confidence) } ) # Count words wordCount += 1 # If we have reached a good amount of text finalize the caption if (wordCount >= maxWords) or (len(caption["caption"]) >= maxLength): caption["end"] = endTime captions.append(caption) wordCount = 0 caption = None # Close the last caption if required if caption is not None: caption["end"] = endTime captions.append(caption) webcaptions_name = "WebCaptions"+"_"+lang i=0 for asset in captions: i=i+1 if i != len(captions): metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, workflow_id=workflow_id, results=asset, paginate=True, end=False) if "Status" not in metadata_upload: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store web captions {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload["Status"] == "Success": pass else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store web captions {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) else: metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, workflow_id=workflow_id, results=asset, paginate=True, end=True) if "Status" not in metadata_upload: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store web captions {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object()) else: if metadata_upload["Status"] == "Success": response_json = metadata_upload operator_object.add_workflow_metadata(WebCaptionsS3Bucket=response_json['Bucket'], WebCaptionsS3Key=response_json['Key']) operator_object.update_workflow_status("Complete") return operator_object.return_output_object() else: operator_object.update_workflow_status("Error") operator_object.add_workflow_metadata( CaptionsError="Unable to store web captions {e}".format(e=metadata_upload)) raise MasExecutionError(operator_object.return_output_object())