def finalize(connection_name): base_name = f'AWS_CLOUDTRAIL_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' # Step two: Configure the remainder once the role is properly configured. cloudtrail_ingest_task = f''' INSERT INTO {landing_table} ( insert_time, raw, hash_raw, event_time, aws_region, event_id, event_name, event_source, event_type, event_version, recipient_account_id, request_id, request_parameters, response_elements, source_ip_address, user_agent, user_identity, user_identity_type, user_identity_principal_id, user_identity_arn, user_identity_accountid, user_identity_invokedby, user_identity_access_key_id, user_identity_username, user_identity_session_context_attributes_mfa_authenticated, user_identity_session_context_attributes_creation_date, user_identity_session_context_session_issuer_type, user_identity_session_context_session_issuer_principal_id, user_identity_session_context_session_issuer_arn, user_identity_session_context_session_issuer_account_id, user_identity_session_context_session_issuer_user_name, error_code, error_message, additional_event_data, api_version, read_only, resources, service_event_details, shared_event_id, vpc_endpoint_id ) SELECT CURRENT_TIMESTAMP() insert_time , value raw , HASH(value) hash_raw --- In the rare event of an unparsable timestamp, the following COALESCE keeps the pipeline from failing. --- Compare event_time to TRY_TO_TIMESTAMP(raw:eventTime::STRING) to establish if the timestamp was parsed. , COALESCE( TRY_TO_TIMESTAMP(value:eventTime::STRING)::TIMESTAMP_LTZ(9), CURRENT_TIMESTAMP() ) event_time , value:awsRegion::STRING aws_region , value:eventID::STRING event_id , value:eventName::STRING event_name , value:eventSource::STRING event_source , value:eventType::STRING event_type , value:eventVersion::STRING event_version , value:recipientAccountId::STRING recipient_account_id , value:requestID::STRING request_id , value:requestParameters::VARIANT request_parameters , value:responseElements::VARIANT response_elements , value:sourceIPAddress::STRING source_ip_address , value:userAgent::STRING user_agent , value:userIdentity::VARIANT user_identity , value:userIdentity.type::STRING user_identity_type , value:userIdentity.principalId::STRING user_identity_principal_id , value:userIdentity.arn::STRING user_identity_arn , value:userIdentity.accountId::STRING user_identity_accountid , value:userIdentity.invokedBy::STRING user_identity_invokedby , value:userIdentity.accessKeyId::STRING user_identity_access_key_id , value:userIdentity.userName::STRING user_identity_username , value:userIdentity.sessionContext.attributes.mfaAuthenticated::STRING user_identity_session_context_attributes_mfa_authenticated , value:userIdentity.sessionContext.attributes.creationDate::STRING user_identity_session_context_attributes_creation_date , value:userIdentity.sessionContext.sessionIssuer.type::STRING user_identity_session_context_session_issuer_type , value:userIdentity.sessionContext.sessionIssuer.principalId::STRING user_identity_session_context_session_issuer_principal_id , value:userIdentity.sessionContext.sessionIssuer.arn::STRING user_identity_session_context_session_issuer_arn , value:userIdentity.sessionContext.sessionIssuer.accountId::STRING user_identity_session_context_session_issuer_account_id , value:userIdentity.sessionContext.sessionIssuer.userName::STRING user_identity_session_context_session_issuer_user_name , value:errorCode::STRING error_code , value:errorMessage::STRING error_message , value:additionalEventData::VARIANT additional_event_data , value:apiVersion::STRING api_version , value:readOnly::BOOLEAN read_only , value:resources::VARIANT resources , value:serviceEventDetails::STRING service_event_details , value:sharedEventId::STRING shared_event_id , value:vpcEndpointId::STRING vpc_endpoint_id FROM data.{base_name}_STREAM, table(flatten(input => v:Records)) WHERE ARRAY_SIZE(v:Records) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_STAGING(v) FROM @data.{base_name}_STAGE/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=cloudtrail_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} doesn't exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification" f"channel for all object create events: {sqs_arn}" ) }
def finalize(connection_name): base_name = f'AWS_CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' DATE_REGEXP = r'.+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z.*'.replace("\\", "\\\\") DATE_ISO8601_BACKREFERENCES = r'\1-\2-\3T\4:\5:\6Z'.replace("\\", "\\\\") config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, configuration_item_capture_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , REGEXP_REPLACE(filename, '{DATE_REGEXP}', '{DATE_ISO8601_BACKREFERENCES}')::TIMESTAMP_LTZ event_time , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) configuration_item_capture_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_stream', target=f'data.{base_name}_staging' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=( f"COPY INTO data.{base_name}_staging(v, filename) " f"FROM (SELECT $1, metadata$filename FROM @data.{base_name}_stage/)" ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"To backfill the landing table with existing data, please run:\n\n ALTER PIPE {pipe} REFRESH;\n\n" ) }
def finalize(connection_name): base_name = f'CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) event_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_staging(v) FROM @data.{base_name}_stage/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events: {sqs_arn}" ) }