def update_ddb_stage_control(item, file_status, timestamp): try: table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE) response = table_stage.update_item( TableName=DYNAMO_DB_STAGE_TABLE, Key={ 's3_object_name_stage': item }, UpdateExpression="set file_status = :file_status, timestamp_step_finished =:timestamp_step_finished", ExpressionAttributeValues={ ':file_status': file_status, ':timestamp_step_finished': timestamp} ) logger.debug('SNS publish response: {}'.format(response)) except Exception as e: msg_exception = "DynamoDB Exception: {}".format(e) logger.info(msg_exception) send_notification( SNS_TOPIC_ARN, 'AWS Lambda: ValidateJobSubmit' ' error: Unable to update DynamoDB Item.\nError: {}'.format(e), 'Datalake:{} Lambda Error'.format(ENVIRONMENT) ) return 'Unable to update Item from table'
def check_files_shutdown_emr(event_cluster_id, context): # check if there are files pending to be processed try: table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE) results_stage = table_stage.scan() except Exception as e: logger.error("Error Reading DynamoDB Table: {}".format(e)) send_notification( SNS_TOPIC_ARN, 'AWS Lambda: {function_name}' ' error: Failed shutdown EMR cluster.\nError: {error}'.format( function_name=context.function_name, error=e ), 'Datalake:{} Lambda Error'.format(ENVIRONMENT) ) return 'Unable to Scan table' if not results_stage.get('Items'): msg = 'DynamoDB Stage Table {} is empty'.format(DYNAMO_DB_STAGE_TABLE) logger.info(msg) # EMR shutdown response = emr_client.terminate_job_flows( JobFlowIds=[ event_cluster_id, ] ) return response
def generate_test_notification_simple_string_notif(value): send_notification({ "xpath": "/test-notifications:string-container-simple-string-changed", "values": [{ "xpath": ("/test-notifications:string-container-simple-string-changed" "/test-notifications:new-value"), "value": value, }], })
def generate_test_notification_container_string_notif(value): send_notification({ "xpath": ("/test-notifications:notification-from-container" "/test-notifications:container-notification-string-changed"), "values": [{ "xpath": ("/test-notifications:notification-from-container" "/test-notifications:container-notification-string-changed" "/test-notifications:new-value"), "value": value, }], })
def check_spark_submit_rule_enabled(): try: resp = events_client.describe_rule(Name=EVENT_SPARK_SUBMIT) logger.debug('Describe Rule response: {}'.format(resp)) rule_status = resp.get('State', 'DISABLED') except ClientError as er: msg_exception = "Describing Events Rule Exception: {}".format(er) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake:{} Spark Submit Exception".format(ENVIRONMENT), "CloudWatch Describe Rule request error: {}".format(msg_exception)) raise Exception('Unable to request API events:DescribeRule') return rule_status
def main(): config.load_config() (options, args) = parser.parse_args() if not options.to: print "Please specify target address with --to [email protected]" sys.exit(1) print "Generating templates for {}".format(options.to) common.send_notification(options.to, config.this_email(), config.get_random_dataset()) common.generate_schedule(options.to)
def generate_test_notification_list_foo_string_notif(key, value): send_notification({ "xpath": ("/test-notifications:notification-from-list" "/test-notifications:notification-from-list[name='{}']" "/test-notifications:list-foo-changed").format(key), "values": [{ "xpath": ("/test-notifications:notification-from-list" "/test-notifications:notification-from-list[name='{}']" "/test-notifications:list-foo-changed" "/test-notifications:new-value").format(key), "value": value, }], })
def test_basic_notification(mgr): mgr.dispatch( to_ele(""" <create-subscription xmlns="urn:ietf:params:xml:ns:netconf:notification:1.0"> <filter> <hardware-state-change xmlns="urn:ietf:params:xml:ns:yang:ietf-hardware" /> </filter> </create-subscription> """)) send_notification({ "xpath": "/ietf-hardware:hardware-state-change", "values": [] }) n = mgr.take_notification(timeout=10) assert n.notification_ele.xpath("//ietf-hw:hardware-state-change", namespaces=NS_MAP)
def get_object_metadata(bucket, key): s3_object_name_raw = None try: resp = s3_client.head_object(Bucket=bucket, Key=key) logger.debug('S3 object head: {}'.format(resp)) for key, value in resp.get('Metadata', {}).items(): if key == METADATA_OBJECT_NAME_RAW: s3_object_name_raw = value return s3_object_name_raw except Exception as e: logger.error("S3 Exception: {}".format(e)) send_notification(SNS_TOPIC_ARN, "Data Lake: Update DynamoDB Stage Exception", "S3 Head object error: {}".format(e)) raise e
def get_object_tag(bucket, key): s3_object_name_raw = None try: resp = s3_client.get_object_tagging(Bucket=bucket, Key=key) logger.debug('S3 object tags: {}'.format(resp)) for tag in resp.get('TagSet', []): if tag.get('Key') == TAG_OBJECT_NAME_RAW: s3_object_name_raw = tag.get('Value') return s3_object_name_raw except Exception as e: logger.error("S3 Exception: {}".format(e)) send_notification(SNS_TOPIC_ARN, "Data Lake: Update DynamoDB Stage Exception", "S3 Get Tags error: {}".format(e)) raise e
def generate_test_notification_embedded_list_string_notif(key1, key2, value): send_notification({ "xpath": ("/test-notifications:notification-from-list" "/test-notifications:notification-from-list[name='{}']" "/test-notifications:embedded-list[name='{}']" "/test-notifications:embedded-foo-changed").format(key1, key2), "values": [{ "xpath": ("/test-notifications:notification-from-list" "/test-notifications:notification-from-list[name='{}']" "/test-notifications:embedded-list[name='{}']" "/test-notifications:embedded-foo-changed" "/test-notifications:new-value").format(key1, key2), "value": value, }], })
def stage_is_empty(): try: table_stage = dynamodb_client.Table(DYNAMO_DB_STAGE_TABLE) results_stage = table_stage.scan() if results_stage.get('Items'): return False else: return True except Exception as e: logger.error("Error Reading DynamoDB Table: {}".format(e)) send_notification( sns_arn=SNS_TOPIC_ARN, subject='Datalake:{} Create EMR Cluster error'.format(ENVIRONMENT), message=str(e) ) raise e
def main(): config.load_config() print "Notifying users of upcoming items" for user in config.get_users(): dataset = config.get_random_dataset() from common import random_date from datetime import datetime, timedelta start = datetime.now() end = datetime.now() + timedelta(days=60) date = random_date(start, end) address = common.generate_token_and_address(user, dataset.get('name'), date) common.send_notification(user, "data.gov.uk <{}>".format(address), dataset)
def set_spark_submit_rule_status(status): if status not in ['ENABLED', 'DISABLED']: logger.error( 'Missing status parameter. Must set status to ENABLED or DISABLED') raise Exception('Missing status to set the event rule') logger.info( 'We are going to {} the scheduled trigger to continue'.format(status)) try: if status == 'ENABLED': api_request = 'events:EnableRule' resp = events_client.enable_rule(Name=EVENT_SPARK_SUBMIT) else: api_request = 'events:DisableRule' resp = events_client.disable_rule(Name=EVENT_SPARK_SUBMIT) logger.debug('{} Rule response: {}'.format(status, resp)) except ClientError as er: msg_exception = "{} Events Rule Exception: {}".format(status, er) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "CloudWatch Enable Rule request error: {}".format(msg_exception)) raise Exception('Unable to request API {}'.format(api_request)) return
# -*- coding: utf-8 -*- """The watcher script.""" from common import send_notification import subprocess import os import re import time send_notification("Waiting...", "Waiting for containers to start...") my_env = os.environ.copy() polling_interval = 10 # 10 seconds max_polling_duration = 10 * 60 # 10 minutes duration = 0 while True: subprocess.check_output duration += polling_interval running_containers = subprocess.run(["docker", "compose", "top"], stdout=subprocess.PIPE, env=my_env) print(running_containers.stdout) if len(running_containers.stdout) > 10: break if duration > max_polling_duration: send_notification("Failed...", "Container failed to start on time", "Hero") exit(1)
def create_cluster(): logger.info('There is no Cluster created to execute the jobs') logger.info('We are going to create a new one to run the jobs.') # JSON args = { "Name": label, "LogUri": "s3://{}".format(S3_LOG_URI), "ReleaseLabel": EMR_RELEASE, } if EMR_CUSTOM_AMI: args.update({ "CustomAmiId": (EMR_CUSTOM_AMI_ID) }) args.update({ "Instances": { "InstanceGroups": [ { "InstanceRole": "MASTER", "InstanceType": str(INSTANCE_TYPE_MASTER), "Name": "Master instance group", "InstanceCount": 1 }, { "InstanceRole": "CORE", "InstanceType": str(INSTANCE_TYPE_CORE), "Name": "Core instance group", "InstanceCount": int(INSTANCE_COUNT_CORE_NODE), "EbsConfiguration": { "EbsBlockDeviceConfigs": [{ "VolumeSpecification": { "SizeInGB": 500, "VolumeType": "gp2" }, "VolumesPerInstance": 1 } ], "EbsOptimized": True } }, { "InstanceRole": "TASK", "InstanceType": str(INSTANCE_TYPE_TASK), "Name": "Task instance group", "InstanceCount": int(INSTANCE_COUNT_TASK_NODE) } ], "Ec2KeyName": EC2_KEYPAIR, "KeepJobFlowAliveWhenNoSteps": True, "TerminationProtected": False, "Ec2SubnetId": EC2_SUBNET_ID }, "BootstrapActions": [{ 'Name': 'Install Libs and Bootstrap Scripts', 'ScriptBootstrapAction': { 'Path': 's3://{}/bootstrap/emr-bootstrap/install_libs.sh'.format(S3_BOOTSTRAP_BUCKET), 'Args': [S3_BOOTSTRAP_BUCKET] } }, { 'Name': 'Boostrap ENI MASTER', 'ScriptBootstrapAction': { 'Path': 's3://{}/bootstrap/emr-bootstrap/emr-eni-proc.sh'.format(S3_BOOTSTRAP_BUCKET), 'Args': [ENI_MASTER] } }], "Steps": [{ 'Name': 'Install the Manage cron job to terminate EMR cluster', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '{}/manage_emr_shutdown_install.sh'.format(EMR_HOME_SCRIPTS), '{}/manage_emr_shutdown.sh'.format(EMR_HOME_SCRIPTS), S3_BOOTSTRAP_BUCKET, SNS_TOPIC_ARN ] } }], "Applications": [{ 'Name': 'Hadoop' }, { 'Name': 'Hive' }, { 'Name': 'Oozie' }, { 'Name': 'Ganglia' }, { 'Name': 'Tez' }, { 'Name': 'Hue' }, { 'Name': 'Spark' }], "Configurations": [ { "Classification": "emrfs-site", "Properties": { "fs.s3.consistent.retryPeriodSeconds": "10", "fs.s3.consistent": "true", "fs.s3.consistent.retryCount": "5", "fs.s3.consistent.metadata.tableName": "EmrFSMetadata" }, "Configurations": [ ] }, { "Classification": "hive-site", "Properties": { "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" }, "Configurations": [ ] }, { "Classification": "spark-hive-site", "Properties": { "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" }, "Configurations": [ ] }, ], "VisibleToAllUsers": True, "JobFlowRole": EMR_EC2_ROLE, "ServiceRole": EMR_ROLE, "Tags": [ { 'Key': 'Role', 'Value': 'EMR Data Lake' }, { 'Key': 'Environment', 'Value': ENVIRONMENT }, { 'Key': 'Label', 'Value': label }, { 'Key': 'Name', 'Value': label } ] }) # Create new EMR cluster emr_launch_message = 'Launching new EMR cluster: {}'.format(label) logger.info(emr_launch_message) send_notification( sns_arn=SNS_TOPIC_ARN, subject='Datalake:{} Create EMR Cluster message'.format(ENVIRONMENT), message=emr_launch_message ) try: response = emr_client.run_job_flow(**args) return response except Exception as e: logger.error("RunJobFlow Exception: {}".format(e)) send_notification( sns_arn=SNS_TOPIC_ARN, subject='Datalake:{} Create EMR Cluster Error'.format(ENVIRONMENT), message='Lambda Create EMR Cluster Error\nError message: {}'.format(e) ) raise e
def lambda_handler(event, context): # chooses the first cluster which is Running or Waiting # possibly can also choose by name or already have the cluster id skip = None if isinstance(event, dict): skip = event.get('skip') clusters = emr_client.list_clusters( ClusterStates=['STARTING', 'RUNNING', 'WAITING']) logger.info(clusters) # ClusterName logger.info(event.values()) logger.info(event.get('detail', {}).get('name', {})) clusterValue = event.get('detail', {}).get('name', {}) if clusterValue != CLUSTER_NAME: logger.error("No valid cluster") return 'No valid cluster' # choose the correct cluster for cluster in clusters.get('Clusters', []): if cluster['Name'] == CLUSTER_NAME: # take the first relevant cluster cluster_id = cluster['Id'] break else: logger.error("No valid clusters") return 'No valid clusters' step_args = [ SETUP_JOBS, "s3://{}/{}".format(S3_BUCKET_PROGRAMS, S3_KEY_PROGRAMS) ] step = { "Name": 'Setup_jobs', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 's3://{}.elasticmapreduce/libs/script-runner/script-runner.jar'. format(REGION), 'Args': step_args } } logger.debug("### Debug mode enabled ###") logger.debug("EMR Step: {}".format(step)) logger.debug("EMR Cluster_id: {}".format(cluster_id)) try: action = emr_client.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step]) logger.info('EMR action: {}'.format(action)) except Exception as e: msg_exception = "EMR Exception: " + str(e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "Lambda Function Name: {}\n{}".format(context.function_name, msg_exception)) return try: table_stage = dynamodb_client.Table(DYNAMO_DB_STAGE_TABLE) table_job = dynamodb_client.Table(DYNAMO_DB_JOB_CATALOG) results = table_stage.scan( FilterExpression=Attr('file_status').ne(skip)) except Exception as e: msg_exception = "DynamoDB Scan Exception: {}".format(e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "Lambda Function Name: {}\n{}".format(context.function_name, msg_exception)) return for item in results.get('Items'): s3_object_name_stage = item.get('s3_object_name_stage') partition_date = item.get('partition') s3_dir_stage = item.get('s3_dir_stage') logger.debug("### Debug mode enabled ###") logger.debug("Items: {}".format(item)) logger.debug("partition_date: {}".format(partition_date)) logger.debug("s3_dir_stage: {}".format(s3_dir_stage)) try: responses = table_job.get_item( Key={'s3_data_source': str(s3_dir_stage)}) except Exception as e: msg_exception = "DynamoDB Job GetItem Exception: {}".format(e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "Lambda Function Name: {}\n{}".format(context.function_name, msg_exception)) return if responses.get('Item'): spark_program_s3_path = responses['Item']['programs'] spark_program = spark_program_s3_path.split("/")[-1] hive_database_raw = responses['Item']['hive_database_raw'] hive_database_analytics = responses['Item'][ 'hive_database_analytics'] hive_table_raw = responses['Item']['hive_table_raw'] hive_table_analytics = responses['Item']['hive_table_analytics'] s3_target = responses['Item']['s3_target'] partition_name_stage = responses['Item']['partition_name_stage'] status_enabled = responses['Item']['Enabled'] params_type = responses.get('Item', {}).get('params_type') params = responses.get('Item', {}).get('params') logger.debug("responses: {}".format(responses['Item'])) logger.debug( "spark_program_s3_path: {}".format(spark_program_s3_path)) logger.debug("spark_program: {}".format(spark_program)) logger.debug("hive_database_raw: {}".format(hive_database_raw)) logger.debug( "hive_database_analytics: {}".format(hive_database_analytics)) logger.debug("hive_table_raw: {}".format(hive_table_raw)) logger.debug( "hive_table_analytycs: {}".format(hive_table_analytics)) logger.debug("s3_target: {}".format(s3_target)) logger.debug( "partition_name_stage: {}".format(partition_name_stage)) logger.debug("status_enabled: {}".format(status_enabled)) # code location on your emr master node code_path = "/home/hadoop/code/" # spark configuration example # step_args = ["/usr/bin/spark-submit", "--spark-conf", "your-configuration", # code_path + "your_file.py", '--your-parameters', 'parameters'] step_args = [ "/usr/bin/spark-submit", "--conf", "spark.yarn.appMasterEnv.PYTHONIOENCODING=utf8" ] if params_type and params_type == 'json': step_args.append(code_path + spark_program) elif params_type and params_type == 'cli': step_args.append(code_path + spark_program) for param in params.split(' '): step_args.append(param) else: step_args.append(code_path + spark_program) step_args.append(hive_database_raw) step_args.append(hive_table_raw) step_args.append(s3_dir_stage) step_args.append(hive_database_analytics) step_args.append(hive_table_analytics) step_args.append(s3_target) if partition_name_stage != 'false': step_args.append('{}={}'.format(partition_name_stage, partition_date)) step = { "Name": s3_object_name_stage, 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': step_args } } if status_enabled == "True": timestamp_step_submitted = time.strftime( "%Y-%m-%dT%H:%M:%S-%Z") try: action = emr_client.add_job_flow_steps( JobFlowId=cluster_id, Steps=[step]) logger.debug("### Debug mode enabled ###") logger.debug("EMR Step: {}".format(step)) logger.debug("EMR Step timestamp_submitted: {}".format( timestamp_step_submitted)) logger.debug("EMR Cluster_id: {}".format(cluster_id)) logger.debug("Added step: {}".format(action)) except ClientError as e: if e.operation_name == 'AddJobFlowSteps' and e.response[ 'Error']['Message'] == STEPS_EXCEEDED: logger.info( 'The maximum number of steps for cluster exceeded') rule_status = check_spark_submit_rule_enabled() if rule_status == 'DISABLED': set_spark_submit_rule_status('ENABLED') else: logger.info( 'The Spark Submit Rule is enabled, exiting') return else: msg_exception = "EMR Add Steps Exception: {}".format(e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "Lambda Function Name: {}\n{}".format( context.function_name, msg_exception)) return 'Error Sending Job Flow Steps' return 'Finished sending step Jobs but with more on queue' # If we were able to send the job we need to update the DDB table with the new Status try: response = table_stage.update_item( TableName=DYNAMO_DB_STAGE_TABLE, Key={'s3_object_name_stage': s3_object_name_stage}, UpdateExpression= "set hive_table_analytics = :hive_table_analytics," "hive_database_analytics = :hive_database_analytics," "s3_target = :s3_target," "timestamp_step_submitted = :timestamp_step_submitted," "file_status = :file_status", ExpressionAttributeValues={ ':hive_table_analytics': hive_table_analytics, ':hive_database_analytics': hive_database_analytics, ':s3_target': s3_target, ':timestamp_step_submitted': timestamp_step_submitted, ':file_status': DatalakeStatus.PROCESSING }) logger.info( 'DynamoDB update response: {}'.format(response)) except Exception as e: msg_exception = "DynamoDB Stage Update Item Exception: {}".format( e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception", "Lambda Function Name: {}\n{}".format( context.function_name, msg_exception)) return else: logger.info("The program is not enabled: {}".format( spark_program_s3_path)) else: logger.info('There is no items returned from DynamoDB') if skip: # We are running from a scheduled rule and there is no more jobs to submit # Let's disable the scheduled rule logger.info( 'Disabling Scheduled Event Rule due to no more jobs to submit') set_spark_submit_rule_status('DISABLED') logger.info('Finished processing the Spark Submit function') return
def lambda_handler(event, context): step_name = event.get('detail', {}).get('name') event_step_message = event.get('detail', {}).get('message') event_step_state = event.get('detail', {}).get('state') event_step_id = event.get('detail', {}).get('stepId') event_cluster_id = event.get('detail', {}).get('clusterId') if 's3://' not in step_name: logger.info("It is not a catalog job.") return cluster_info = emr_client.describe_cluster(ClusterId=event_cluster_id) logger.info(cluster_info) cluster_name = cluster_info.get("Cluster", {}).get("Name") logger.info(cluster_name) timestamp_step_finished = time.strftime("%Y-%m-%dT%H:%M:%S-%Z") logger.debug("### Debug mode enabled ###") logger.debug("Received event: {}".format(json.dumps(event, indent=2))) logger.debug("Step Name: {}".format(step_name)) logger.debug("Cluster Name: {}".format(event_cluster_id)) logger.debug("Message event step changed: {}".format(event_step_message)) if 'COMPLETED' in event_step_state: message_step_completed = "job execution completed: Name: {}; ID: {}".format(step_name, event_step_id) logger.info(message_step_completed) try: table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE) response = table_stage.get_item(Key={'s3_object_name_stage': str(step_name)}) logger.info(response) except Exception as e: msg_exception = "DynamoDB Exception: {}".format(e) logger.error(msg_exception) logger.debug(traceback.print_exc()) send_notification( SNS_TOPIC_ARN, 'AWS Lambda: {function_name}' ' error: Unable to get DynamoDB Item.\nError: {error}'.format( function_name=context.function_name, error=e ), 'Datalake:{} Lambda Error'.format(ENVIRONMENT) ) return 'Unable to Get Item from table' if response.get('Item'): hive_database_analytics = response['Item']['hive_database_analytics'] hive_table_analytics = response['Item']['hive_table_analytics'] s3_target = response['Item']['s3_target'] s3_object_name_raw = response['Item']['s3_object_name_raw'] logger.debug("### Debug mode enabled ###") logger.debug("Updating Table: {}".format(DYNAMO_DB_CONTROL)) logger.debug("s3_object_name_raw: {}".format(s3_object_name_raw)) try: table_control = dynamodb_resource.Table(DYNAMO_DB_CONTROL) response_control = table_control.update_item( TableName=DYNAMO_DB_CONTROL, Key={ 's3_object_name': str(s3_object_name_raw) }, UpdateExpression="set file_status = :file_status, " "timestamp_step_finished = :timestamp_step_finished, " "hive_table_analytics = :hive_table_analytics, " "hive_database_analytics = :hive_database_analytics, " "s3_target = :s3_target", ExpressionAttributeValues={ ':file_status': DatalakeStatus.LOADED, ':timestamp_step_finished': str(timestamp_step_finished), ':hive_table_analytics': str(hive_table_analytics), ':hive_database_analytics': str(hive_database_analytics), ':s3_target': str(s3_target)} ) logger.debug('DDB update_item response: {}'.format(response_control)) except Exception as e: msg_exception = "DynamoDB Exception: {}".format(e) logger.error(msg_exception) logger.debug(traceback.print_exc()) send_notification( SNS_TOPIC_ARN, 'AWS Lambda: {function_name}' ' error: Unable to update DynamoDB Item.\nError: {error}'.format( function_name=context.function_name, error=e ), 'Datalake:{} Lambda Error'.format(ENVIRONMENT) ) return 'Unable to Update Item from table' else: logger.info('There is no items returned from DynamoDB!') return 'No items to process' # TODO: Create a parameter to Delete or Keep the item in the DynamoDB StageControl logger.info("Cleaning Table DynamoDB: {}; s3_object_name_stage: {}".format(DYNAMO_DB_STAGE_TABLE, step_name)) try: response_stage = table_stage.delete_item(Key={'s3_object_name_stage': step_name}) http_status_code_delete_stage = response_stage['ResponseMetadata']['HTTPStatusCode'] logger.debug("### Debug mode enabled ###") logger.debug(response) logger.debug("HTTPStatusCode: {}".format(http_status_code_delete_stage)) except Exception as e: msg_exception = "DynamoDB Exception: {}".format(e) logger.error(msg_exception) send_notification( SNS_TOPIC_ARN, 'AWS Lambda: {function_name}' ' error: Unable to delete DynamoDB Item.\nError: {error}'.format( function_name=context.function_name, error=e ), 'Datalake:{} Lambda Error'.format(ENVIRONMENT) ) return 'Unable to delete Item from table' logger.info("Cleaning s3 object stage: {}".format(step_name)) bucket_stage = step_name.split("/")[2] logger.info("bucket: {}".format(bucket_stage)) key_stage = step_name.split('/', 3)[3] logger.info("Key: {}".format(key_stage)) try: s3_client.delete_object(Bucket=bucket_stage, Key=key_stage) # check if there are files pending to be processed # This step shutdown the cluster if there are no items in the StageControl Table check_files_shutdown_emr(event_cluster_id, context) except Exception as e: logger.error("S3 Exception: {}".format(e)) return elif 'FAILED' in event_step_state: message_step_failed = "job execution failed: Name: {}; ID: {}".format(step_name, event_step_id) logger.info(message_step_failed) update_ddb_stage_control(step_name, DatalakeStatus.FAILED, timestamp_step_finished) elif 'CANCELLED' in event_step_state: message_step_cancelled = "job execution cancelled: Name: {}; ID: {}".format(step_name, event_step_id) logger.info(message_step_cancelled) update_ddb_stage_control(step_name, DatalakeStatus.CANCELED, timestamp_step_finished) else: return