def finalize(connection_name): base_name = f'AWS_CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' DATE_REGEXP = r'.+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z.*'.replace("\\", "\\\\") DATE_ISO8601_BACKREFERENCES = r'\1-\2-\3T\4:\5:\6Z'.replace("\\", "\\\\") config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, configuration_item_capture_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , REGEXP_REPLACE(filename, '{DATE_REGEXP}', '{DATE_ISO8601_BACKREFERENCES}')::TIMESTAMP_LTZ event_time , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) configuration_item_capture_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_stream', target=f'data.{base_name}_staging' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=( f"COPY INTO data.{base_name}_staging(v, filename) " f"FROM (SELECT $1, metadata$filename FROM @data.{base_name}_stage/)" ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"To backfill the landing table with existing data, please run:\n\n ALTER PIPE {pipe} REFRESH;\n\n" ) }
def finalize(connection_name): base_name = f'AWS_CLOUDTRAIL_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' # Step two: Configure the remainder once the role is properly configured. cloudtrail_ingest_task = f''' INSERT INTO {landing_table} ( insert_time, raw, hash_raw, event_time, aws_region, event_id, event_name, event_source, event_type, event_version, recipient_account_id, request_id, request_parameters, response_elements, source_ip_address, user_agent, user_identity, user_identity_type, user_identity_principal_id, user_identity_arn, user_identity_accountid, user_identity_invokedby, user_identity_access_key_id, user_identity_username, user_identity_session_context_attributes_mfa_authenticated, user_identity_session_context_attributes_creation_date, user_identity_session_context_session_issuer_type, user_identity_session_context_session_issuer_principal_id, user_identity_session_context_session_issuer_arn, user_identity_session_context_session_issuer_account_id, user_identity_session_context_session_issuer_user_name, error_code, error_message, additional_event_data, api_version, read_only, resources, service_event_details, shared_event_id, vpc_endpoint_id ) SELECT CURRENT_TIMESTAMP() insert_time , value raw , HASH(value) hash_raw --- In the rare event of an unparsable timestamp, the following COALESCE keeps the pipeline from failing. --- Compare event_time to TRY_TO_TIMESTAMP(raw:eventTime::STRING) to establish if the timestamp was parsed. , COALESCE( TRY_TO_TIMESTAMP(value:eventTime::STRING)::TIMESTAMP_LTZ(9), CURRENT_TIMESTAMP() ) event_time , value:awsRegion::STRING aws_region , value:eventID::STRING event_id , value:eventName::STRING event_name , value:eventSource::STRING event_source , value:eventType::STRING event_type , value:eventVersion::STRING event_version , value:recipientAccountId::STRING recipient_account_id , value:requestID::STRING request_id , value:requestParameters::VARIANT request_parameters , value:responseElements::VARIANT response_elements , value:sourceIPAddress::STRING source_ip_address , value:userAgent::STRING user_agent , value:userIdentity::VARIANT user_identity , value:userIdentity.type::STRING user_identity_type , value:userIdentity.principalId::STRING user_identity_principal_id , value:userIdentity.arn::STRING user_identity_arn , value:userIdentity.accountId::STRING user_identity_accountid , value:userIdentity.invokedBy::STRING user_identity_invokedby , value:userIdentity.accessKeyId::STRING user_identity_access_key_id , value:userIdentity.userName::STRING user_identity_username , value:userIdentity.sessionContext.attributes.mfaAuthenticated::STRING user_identity_session_context_attributes_mfa_authenticated , value:userIdentity.sessionContext.attributes.creationDate::STRING user_identity_session_context_attributes_creation_date , value:userIdentity.sessionContext.sessionIssuer.type::STRING user_identity_session_context_session_issuer_type , value:userIdentity.sessionContext.sessionIssuer.principalId::STRING user_identity_session_context_session_issuer_principal_id , value:userIdentity.sessionContext.sessionIssuer.arn::STRING user_identity_session_context_session_issuer_arn , value:userIdentity.sessionContext.sessionIssuer.accountId::STRING user_identity_session_context_session_issuer_account_id , value:userIdentity.sessionContext.sessionIssuer.userName::STRING user_identity_session_context_session_issuer_user_name , value:errorCode::STRING error_code , value:errorMessage::STRING error_message , value:additionalEventData::VARIANT additional_event_data , value:apiVersion::STRING api_version , value:readOnly::BOOLEAN read_only , value:resources::VARIANT resources , value:serviceEventDetails::STRING service_event_details , value:sharedEventId::STRING shared_event_id , value:vpcEndpointId::STRING vpc_endpoint_id FROM data.{base_name}_STREAM, table(flatten(input => v:Records)) WHERE ARRAY_SIZE(v:Records) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_STAGING(v) FROM @data.{base_name}_STAGE/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=cloudtrail_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} doesn't exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification" f"channel for all object create events: {sqs_arn}" ) }
def connect(connection_name, options): connection_type = options['connection_type'] base_name = f"azure_log_{connection_name}_{connection_type}" account_name = options['account_name'] container_name = options['container_name'] suffix = options['suffix'] cloud_type = options['cloud_type'] sas_token = options['sas_token'] comment = yaml_dump(module='azure_log') db.create_stage( name=f'data.{base_name}_stage', url=f"azure://{account_name}.blob.{suffix}/{container_name}", cloud='azure', prefix='', credentials=sas_token, file_format=FILE_FORMAT, ) db.execute( f'GRANT USAGE ON STAGE data.{base_name}_stage TO ROLE {SA_ROLE}') db.create_table( name=f'data.{base_name}_connection', cols=LANDING_TABLES_COLUMNS[connection_type], comment=comment, ifnotexists=True, ) db.execute( f'GRANT INSERT, SELECT ON data.{base_name}_connection TO ROLE {SA_ROLE}' ) external_table_columns = [( 'timestamp_part', 'TIMESTAMP_LTZ', GET_TIMESTAMP_FROM_FILENAME_SQL[connection_type], )] db.create_external_table( name=f'data.{base_name}_external', location=f'@data.{base_name}_stage', cols=external_table_columns, partition='timestamp_part', file_format=db.TypeOptions(type='JSON'), ) db.execute(f'GRANT SELECT ON data.{base_name}_external TO ROLE {SA_ROLE}') stored_proc_def = f""" var sql_command = "ALTER EXTERNAL TABLE data.{base_name}_external REFRESH"; try {{ snowflake.execute ({{sqlText: sql_command}}); return "Succeeded."; }} catch (err) {{ return "Failed: " + err; }} """ db.create_stored_procedure( name=f'data.{base_name}_procedure', args=[], return_type='string', executor='OWNER', definition=stored_proc_def, ) refresh_task_sql = f'CALL data.{base_name}_procedure()' db.create_task( name=f'data.{base_name}_refresh_task', warehouse=WAREHOUSE, schedule='5 minutes', sql=refresh_task_sql, ) select_statement_sql = { 'reg': (f"SELECT value " f"FROM data.{base_name}_external " f"WHERE timestamp_part >= DATEADD(HOUR, -2, CURRENT_TIMESTAMP())"), 'gov': (f"SELECT value FROM (" f" SELECT value AS a " f" FROM data.{base_name}_external" f" WHERE timestamp_part >= DATEADD(HOUR, -2, CURRENT_TIMESTAMP())" f"), LATERAL FLATTEN (INPUT => a:records)"), } insert_task_sql = { 'operation': f""" INSERT ( RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, IDENTITY_AUTHORIZATION, IDENTITY_CLAIMS, LEVEL, LOCATION, OPERATION_NAME, PROPERTIES, PROPERTIES_ANCESTORS, PROPERTIES_IS_COMPLIANCE_CHECK, PROPERTIES_POLICIES, PROPERTIES_RESOURCE_LOCATION, RESOURCE_ID, RESULT_SIGNATURE, RESULT_TYPE, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs::NUMBER, VALUE:identity::VARIANT, VALUE:identity.authorization::VARIANT, VALUE:identity.claims::VARIANT, VALUE:level::STRING, VALUE:location::STRING, VALUE:operationName::STRING, VALUE:properties::VARIANT, VALUE:properties.ancestors::STRING, VALUE:properties.isComplianceCheck::STRING, PARSE_JSON(VALUE:properties.policies),VALUE:properties.resourceLocation::STRING, VALUE:resourceId::STRING, VALUE:resultSignature::STRING,VALUE:resultType::STRING, value:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, 'audit': f""" INSERT ( RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, LEVEL, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_ACTIVITY_DATE_TIME, PROPERTIES_ACTIVITY_DISPLAY_NAME, PROPERTIES_ADDITIONAL_DETAILS, PROPERTIES_CATEGORY, PROPERTIES_ID, PROPERTIES_INITIATED_BY, PROPERTIES_LOGGED_BY_SERVICE, PROPERTIES_OPERATION_TYPE, PROPERTIES_RESULT, PROPERTIES_RESULT_REASON, PROPERTIES_TARGET_RESOURCES, RESOURCE_ID, RESULT_SIGNATURE, TENANT_ID, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs::NUMBER, VALUE:level::STRING, VALUE:operationName::STRING, VALUE:operationVersion::STRING, VALUE:properties::VARIANT, VALUE:properties.activityDateTime::TIMESTAMP_LTZ, VALUE:properties.activityDisplayName::STRING, VALUE:properties.additionalDetails::VARIANT, VALUE:properties.category::STRING, VALUE:properties.id::STRING, VALUE:properties.initiatedBy::VARIANT, VALUE:properties.loggedByService::STRING, VALUE:properties.operationType::STRING, VALUE:properties.result::STRING, VALUE:resultReason::STRING, VALUE:properties.targetResources::VARIANT, VALUE:resourceId::STRING, VALUE:resultSignature::STRING, VALUE:tenantId::STRING, VALUE:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, 'signin': f""" INSERT ( RAW, HASH_RAW, LEVEL, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, LOCATION, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_APP_DISPLAY_NAME, PROPERTIES_APP_ID, PROPERTIES_APPLIED_CONDITIONAL_ACESS_POLICIES, PROPERTIES_AUTHENTICATION_METHODS_USED, PROPERTIES_AUTHENTICATION_PROCESSING_DETAILS, PROPERTIES_CLIENT_APP_USED, PROPERTIES_CONDITIONAL_ACCESS_STATUS, PROPERTIES_CREATED_DATE_TIME, PROPERTIES_DEVICE_DETAIL, PROPERTIES_ID, PROPERTIES_IP_ADDRESS, PROPERTIES_IS_INTERACTIVE, PROPERTIES_LOCATION, PROPERTIES_MFA_DETAIL, PROPERTIES_NETWORK_LOCATION, PROPERTIES_PROCESSING_TIME_IN_MILLISECONDS, PROPERTIES_RESOURCE_DISPLAY_NAME, PROPERTIES_RESOURCE_ID, PROPERTIES_RISK_DETAIL, PROPERTIES_RISK_EVENT_TYPES, PROPERTIES_RISK_LEVEL_AGGREGATED, PROPERTIES_RISK_LEVEL_DURING_SIGNIN, PROPERTIES_RISK_STATE, PROPERTIES_STATUS, PROPERTIES_TOKEN_ISSUER_TYPE, PROPERTIES_USER_DISPLAY_NAME, PROPERTIES_USER_ID, PROPERTIES_USER_PRINCIPAL_NAME, RESOURCE_ID, RESULT_DESCRIPTION, RESULT_SIGNATURE, RESULT_TYPE, TENANT_ID, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:Level::NUMBER, VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs, VALUE:identity::STRING, VALUE:location::STRING, VALUE:operationName::STRING, VALUE:operationVersion::STRING, VALUE:properties::VARIANT, VALUE:properties.appDisplayName::STRING, VALUE:properties.appId::STRING, VALUE:properties.appliedConditionalAccessPolicies::VARIANT, VALUE:properties.authenticationMethodsUsed::VARIANT, VALUE:properties.authenticationProcessingDetails::VARIANT, VALUE:properties.clientAppUsed::STRING, VALUE:properties.conditionalAccessStatus::STRING, VALUE:properties.createdDateTime::TIMESTAMP_LTZ, VALUE:properties.deviceDetail::VARIANT, VALUE:properties.id::STRING, VALUE:properties.ipAddress::STRING, VALUE:properties.isInteractive::BOOLEAN, VALUE:properties.location::VARIANT, VALUE:properties.mfaDetail::VARIANT, VALUE:properties.networkLocationDetails::VARIANT, VALUE:properties.processingTimeInMilliseconds::NUMBER, VALUE:properties.resourceDisplayName::STRING, VALUE:properties.resourceId::STRING, VALUE:properties.riskDetail::STRING, VALUE:properties.riskEventTypes::VARIANT, VALUE:properties.riskLevelAggregated::STRING, VALUE:properties.riskLevelDuringSignIn::STRING, VALUE:properties.riskState::VARIANT, VALUE:properties.status::VARIANT, VALUE:properties.tokenIssuerType::STRING, VALUE:properties.userDisplayName::STRING, VALUE:properties.userId::STRING, VALUE:properties.userPrincipalName::STRING, VALUE:resourceId::STRING, VALUE:resultDescription::STRING, VALUE:resultSignature::STRING, VALUE:resultType::STRING, VALUE:tenantId::STRING, VALUE:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, } ingest_task_sql = f""" MERGE INTO data.{base_name}_connection a USING ( {select_statement_sql[cloud_type]} ) b ON a.raw = b.value WHEN NOT MATCHED THEN {insert_task_sql[connection_type]} """ db.create_task( name=f'data.{base_name}_ingest_task', warehouse=WAREHOUSE, schedule=f'AFTER data.{base_name}_refresh_task', sql=ingest_task_sql, ) return { 'newStage': 'finalized', 'newMessage': 'Created Stage, Tables, Stored Procedure, and Tasks.', }
def finalize(connection_name): base_name = f'CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) event_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_staging(v) FROM @data.{base_name}_stage/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events: {sqs_arn}" ) }