def finalize(connection_name): base_name = f'ldap_{connection_name}' pipe = f'data.{base_name}_pipe' table = next( db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data")) options = yaml.safe_load(table['comment']) stage = options.get('existing_stage', f'data.{base_name}_stage') # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=(f"COPY INTO data.{base_name}_connection " f"FROM (SELECT $1, $2, $3, $4," f" to_timestamp_ltz($5, 'mm/dd/yyyy hh24:mi:ss (UTC)')," f" to_timestamp_ltz($6, 'mm/dd/yyyy hh24:mi:ss (UTC)')," f" to_timestamp_ltz($7, 'mm/dd/yyyy hh24:mi:ss (UTC)')," f" to_timestamp_ltz($8, 'mm/dd/yyyy hh24:mi:ss (UTC)') " f"FROM @{stage}/)"), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.", } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': (f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"If you'd like to backfill the table, please run\n\n ALTER PIPE {pipe} REFRESH;" ), }
def finalize(connection_name): base_name = f'GITHUB_WEBHOOKS_S3_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql =(f"COPY INTO data.{base_name}_connection " f"FROM (SELECT current_timestamp, $1, HASH($1), $1:ref, $1: before, $1:after, $1:created, $1:deleted," f"$1:forced, $1:base_ref, $1:compare, $1:commits, $1:head_commit," f"$1:repository, $1:pusher, $1:organization, $1:sender, $1:action, $1:check_run, $1:check_suite, $1:number, $1:pull_request," f"$1:label, $1:requested_team, $1:ref_type, $1:master_branch, $1:description, $1:pusher_type, $1:review, $1:changes, $1:comment, " f"$1:issue, $1:id, $1:sha, $1:name, $1:target_url, $1:context, $1:state, $1:commit, $1:branches, $1:created_at, $1:updated_at, $1:assignee, " f"$1:release, $1:membership, $1:alert, $1:scope, $1:member, $1:requested_reviewer, $1:team, $1:starred_at, $1:pages, $1:project_card, " f"$1:build, $1:deployment_status, $1:deployment, $1:forkee, $1:milestone, $1:key, $1:project_column, $1:status, $1:avatar_url FROM @data.{base_name}_stage/)" ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.", } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': (f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"To backfill the landing table with existing data, please run:\n\n ALTER PIPE {pipe} REFRESH;\n\n" ), }
def finalize(connection_name): base_name = f'osquery_log_{connection_name}' table = next( db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data")) options = yaml.load(table['comment']) stage = options.get('existing_stage', f'data.{base_name}_stage') pipe = f'data.{base_name}_pipe' # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql =(f'COPY INTO data.{base_name}_connection ' f'FROM (SELECT PARSE_JSON($1), HASH($1), $1:unixTime::TIMESTAMP_LTZ(9), $1:action, ' f'$1:calendarTime, $1:columns, $1:counter, $1:epoch, $1:hostIdentifier, $1:instance_id, ' f'$1:name, $1:unixTime, $1:decorations ' f'FROM @{stage}/)'), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.", } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': (f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"If you'd like to backfill the table, please run\n\n ALTER PIPE {pipe} REFRESH;" ), }
def finalize(connection_name): base_name = f'aws_vpc_flow_log_{connection_name}' pipe = f'data.{base_name}_pipe' # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=( f'COPY INTO data.{base_name}_connection ' f'FROM (SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 ' f'FROM @data.{base_name}_stage/)' ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.", } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"If you'd like to backfill the table, please run\n\n ALTER PIPE {pipe} REFRESH;" ), }
def finalize(connection_name): base_name = f'AWS_CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' DATE_REGEXP = r'.+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z.*'.replace("\\", "\\\\") DATE_ISO8601_BACKREFERENCES = r'\1-\2-\3T\4:\5:\6Z'.replace("\\", "\\\\") config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, configuration_item_capture_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , REGEXP_REPLACE(filename, '{DATE_REGEXP}', '{DATE_ISO8601_BACKREFERENCES}')::TIMESTAMP_LTZ event_time , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) configuration_item_capture_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_stream', target=f'data.{base_name}_staging' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=( f"COPY INTO data.{base_name}_staging(v, filename) " f"FROM (SELECT $1, metadata$filename FROM @data.{base_name}_stage/)" ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) if pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events:\n\n {sqs_arn}\n\n" f"To backfill the landing table with existing data, please run:\n\n ALTER PIPE {pipe} REFRESH;\n\n" ) }
def finalize(connection_name): base_name = f'AWS_CLOUDTRAIL_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' # Step two: Configure the remainder once the role is properly configured. cloudtrail_ingest_task = f''' INSERT INTO {landing_table} ( insert_time, raw, hash_raw, event_time, aws_region, event_id, event_name, event_source, event_type, event_version, recipient_account_id, request_id, request_parameters, response_elements, source_ip_address, user_agent, user_identity, user_identity_type, user_identity_principal_id, user_identity_arn, user_identity_accountid, user_identity_invokedby, user_identity_access_key_id, user_identity_username, user_identity_session_context_attributes_mfa_authenticated, user_identity_session_context_attributes_creation_date, user_identity_session_context_session_issuer_type, user_identity_session_context_session_issuer_principal_id, user_identity_session_context_session_issuer_arn, user_identity_session_context_session_issuer_account_id, user_identity_session_context_session_issuer_user_name, error_code, error_message, additional_event_data, api_version, read_only, resources, service_event_details, shared_event_id, vpc_endpoint_id ) SELECT CURRENT_TIMESTAMP() insert_time , value raw , HASH(value) hash_raw --- In the rare event of an unparsable timestamp, the following COALESCE keeps the pipeline from failing. --- Compare event_time to TRY_TO_TIMESTAMP(raw:eventTime::STRING) to establish if the timestamp was parsed. , COALESCE( TRY_TO_TIMESTAMP(value:eventTime::STRING)::TIMESTAMP_LTZ(9), CURRENT_TIMESTAMP() ) event_time , value:awsRegion::STRING aws_region , value:eventID::STRING event_id , value:eventName::STRING event_name , value:eventSource::STRING event_source , value:eventType::STRING event_type , value:eventVersion::STRING event_version , value:recipientAccountId::STRING recipient_account_id , value:requestID::STRING request_id , value:requestParameters::VARIANT request_parameters , value:responseElements::VARIANT response_elements , value:sourceIPAddress::STRING source_ip_address , value:userAgent::STRING user_agent , value:userIdentity::VARIANT user_identity , value:userIdentity.type::STRING user_identity_type , value:userIdentity.principalId::STRING user_identity_principal_id , value:userIdentity.arn::STRING user_identity_arn , value:userIdentity.accountId::STRING user_identity_accountid , value:userIdentity.invokedBy::STRING user_identity_invokedby , value:userIdentity.accessKeyId::STRING user_identity_access_key_id , value:userIdentity.userName::STRING user_identity_username , value:userIdentity.sessionContext.attributes.mfaAuthenticated::STRING user_identity_session_context_attributes_mfa_authenticated , value:userIdentity.sessionContext.attributes.creationDate::STRING user_identity_session_context_attributes_creation_date , value:userIdentity.sessionContext.sessionIssuer.type::STRING user_identity_session_context_session_issuer_type , value:userIdentity.sessionContext.sessionIssuer.principalId::STRING user_identity_session_context_session_issuer_principal_id , value:userIdentity.sessionContext.sessionIssuer.arn::STRING user_identity_session_context_session_issuer_arn , value:userIdentity.sessionContext.sessionIssuer.accountId::STRING user_identity_session_context_session_issuer_account_id , value:userIdentity.sessionContext.sessionIssuer.userName::STRING user_identity_session_context_session_issuer_user_name , value:errorCode::STRING error_code , value:errorMessage::STRING error_message , value:additionalEventData::VARIANT additional_event_data , value:apiVersion::STRING api_version , value:readOnly::BOOLEAN read_only , value:resources::VARIANT resources , value:serviceEventDetails::STRING service_event_details , value:sharedEventId::STRING shared_event_id , value:vpcEndpointId::STRING vpc_endpoint_id FROM data.{base_name}_STREAM, table(flatten(input => v:Records)) WHERE ARRAY_SIZE(v:Records) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_STAGING(v) FROM @data.{base_name}_STAGE/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=cloudtrail_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} doesn't exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification" f"channel for all object create events: {sqs_arn}" ) }
def connect(connection_name, options): connection_type = options['connection_type'] base_name = f"azure_{connection_name}_{connection_type}" account_name = options['account_name'] container_name = options['container_name'] suffix = options['suffix'] sas_token = options['sas_token'] sas_token_ct = vault.encrypt(sas_token) comment = f''' --- module: azure storage_account: {account_name} container_name: {container_name} suffix: {suffix} sas_token: {sas_token_ct} sa_user: {USER} snowflake_account: {ACCOUNT} database: {DATABASE} ''' db.create_stage( name=f'data.{base_name}_STAGE', url=f"azure://{account_name}.blob.{suffix}/{container_name}", cloud='azure', prefix='', credentials=sas_token, file_format=FILE_FORMAT) db.execute( f'GRANT USAGE ON STAGE data.{base_name}_STAGE TO ROLE {SA_ROLE}') db.create_table(name=f'data.{base_name}_CONNECTION', cols=LANDING_TABLES_COLUMNS[connection_type], comment=comment) db.execute( f'GRANT INSERT, SELECT ON data.{base_name}_CONNECTION TO ROLE {SA_ROLE}' ) pipe_sql = { 'operation': f''' COPY INTO DATA.{base_name}_CONNECTION(RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, IDENTITY_AUTHORIZATION, IDENTITY_CLAIMS, LEVEL, LOCATION, OPERATION_NAME, PROPERTIES, PROPERTIES_ANCESTORS, PROPERTIES_IS_COMPLIANCE_CHECK, PROPERTIES_POLICIES, PROPERTIES_RESOURCE_LOCAATION, RESOURCE_ID, RESULT_SIGNATURE, RESULT_TYPE, EVENT_TIME, LOADED_ON) FROM ( SELECT $1, HASH($1), $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING, $1:durationMs::NUMBER, $1:identity::VARIANT, $1:identity.authorization::VARIANT, $1:identity.claims::VARIANT, $1:level::STRING, $1:location::STRING, $1:operationName::STRING, $1:properties::VARIANT, $1:properties.ancestors::STRING, $1:properties.isComplianceCheck::STRING, PARSE_JSON($1:properties.policies), $1:properties.resourceLocation::STRING, $1:resourceId::STRING, $1:resultSignature::STRING, $1:resultType::STRING, $1:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() FROM @DATA.{base_name}_STAGE) ''', 'audit': f''' COPY INTO data.{base_name}_CONNECTION (RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, LEVEL, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_ACTIVITY_DATE_TIME, PROPERTIES_ACTIVITY_DISPLAY_NAME, PROPERTIES_ADDITIONAL_DETAILS, PROPERTIES_CATEGORY, PROPERTIES_ID, PROPERTIES_INITIATED_BY, PROPERTIES_LOGGED_BY_SERVICE, PROPERTIES_OPERATION_TYPE, PROPERTIES_RESULT, PROPERTIES_RESULT_REASON, PROPERTIES_TARGET_RESOURCES, RESOURCE_ID, RESULT_SIGNATURE, TENANT_ID, EVENT_TIME, LOADED_ON) FROM ( SELECT $1, HASH($1), $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING, $1:durationMs::NUMBER, $1:level::STRING, $1:operationName::STRING, $1:operationVersion::STRING, $1:properties::VARIANT, $1:properties.activityDateTime::TIMESTAMP_LTZ, $1:properties.activityDisplayName::STRING, $1:properties.additionalDetails::VARIANT, $1:properties.category::STRING, $1:properties.id::STRING, $1:properties.initiatedBy::VARIANT, $1:properties.loggedByService::STRING, $1:properties.operationType::STRING, $1:properties.result::STRING, $1:resultReason::STRING, $1:properties.targetResources::VARIANT, $1:resourceId::STRING, $1:resultSignature::STRING, $1:tenantId::STRING, $1:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() FROM @data.{base_name}_STAGE ) ''', 'signin': f''' COPY INTO DATA.{base_name}_CONNECTION ( RAW, HASH_RAW, LEVEL, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, LOCATION, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_APP_DISPLAY_NAME, PROPERTIES_APP_ID, PROPERTIES_APPLIED_CONDITIONAL_ACESS_POLICIES, PROPERTIES_AUTHENTICATION_METHODS_USED, PROPERTIES_AUTHENTICATION_PROCESSING_DETAILS, PROPERTIES_CLIENT_APP_USED, PROPERTIES_CONDITIONAL_ACCESS_STATUS, PROPERTIES_CREATED_DATE_TIME, PROPERTIES_DEVICE_DETAIL, PROPERTIES_ID, PROPERTIES_IP_ADDRESS, PROPERTIES_IS_INTERACTIVE, PROPERTIES_LOCATION, PROPERTIES_MFA_DETAIL, PROPERTIES_NETWORK_LOCATION, PROPERTIES_PROCESSING_TIME_IN_MILLISECONDS, PROPERTIES_RESOURCE_DISPLAY_NAME, PROPERTIES_RESOURCE_ID, PROPERTIES_RISK_DETAIL, PROPERTIES_RISK_EVENT_TYPES, PROPERTIES_RISK_LEVEL_AGGREGATED, PROPERTIES_RISK_LEVEL_DURING_SIGNIN, PROPERTIES_RISK_STATE, PROPERTIES_STATUS, PROPERTIES_TOKEN_ISSUER_TYPE, PROPERTIES_USER_DISPLAY_NAME, PROPERTIES_USER_ID, PROPERTIES_USER_PRINCIPAL_NAME, RESOURCE_ID, RESULT_DESCRIPTION, RESULT_SIGNATURE, RESULT_TYPE, TENANT_ID, EVENT_TIME, LOADED_ON ) FROM ( SELECT $1, HASH($1), $1:Level::NUMBER, $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING, $1:durationMs, $1:identity::STRING, $1:location::STRING, $1:operationName::STRING, $1:operationVersion::STRING, $1:properties::VARIANT, $1:properties.appDisplayName::STRING, $1:properties.appId::STRING, $1:properties.appliedConditionalAccessPolicies::VARIANT, $1:properties.authenticationMethodsUsed::VARIANT, $1:properties.authenticationProcessingDetails::VARIANT, $1:properties.clientAppUsed::STRING, $1:properties.conditionalAccessStatus::STRING, $1:properties.createdDateTime::TIMESTAMP_LTZ, $1:properties.deviceDetail::VARIANT, $1:properties.id::STRING, $1:properties.ipAddress::STRING, $1:properties.isInteractive::BOOLEAN, $1:properties.location::VARIANT, $1:properties.mfaDetail::VARIANT, $1:properties.networkLocationDetails::VARIANT, $1:properties.processingTimeInMilliseconds::NUMBER, $1:properties.resourceDisplayName::STRING, $1:properties.resourceId::STRING, $1:properties.riskDetail::STRING, $1:properties.riskEventTypes::VARIANT, $1:properties.riskLevelAggregated::STRING, $1:properties.riskLevelDuringSignIn::STRING, $1:properties.riskState::VARIANT, $1:properties.status::VARIANT, $1:properties.tokenIssuerType::STRING, $1:properties.userDisplayName::STRING, $1:properties.userId::STRING, $1:properties.userPrincipalName::STRING, $1:resourceId::STRING, $1:resultDescription::STRING, $1:resultSignature::STRING, $1:resultType::STRING, $1:tenantId::STRING, $1:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() FROM @DATA.{base_name}_STAGE ) ''' } db.create_pipe(name=f"data.{base_name}_PIPE", sql=pipe_sql[options['connection_type']], replace=True) db.execute( f'ALTER PIPE data.{base_name}_PIPE SET PIPE_EXECUTION_PAUSED=true') db.execute( f'GRANT OWNERSHIP ON PIPE data.{base_name}_PIPE TO ROLE {SA_ROLE}') return { 'newStage': 'finalized', 'newMessage': 'Table, Stage, and Pipe created' }
def finalize(connection_name): base_name = f'CONFIG_{connection_name}_EVENTS'.upper() pipe = f'data.{base_name}_PIPE' landing_table = f'data.{base_name}_CONNECTION' config_ingest_task = f''' INSERT INTO {landing_table} ( raw, hash_raw, event_time, account_id, aws_region, resource_type, arn, availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags ) SELECT value raw , HASH(value) hash_raw , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) event_time , value:awsAccountId::STRING account_id , value:awsRegion::STRING aws_region , value:resourceType::STRING aws_region , value:ARN::STRING arn , value:availabilityZone::STRING availability_zone , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time , value:resourceName::STRING resource_name , value:resourceId::STRING resource_Id , value:relationships::VARIANT relationships , value:configuration::VARIANT configuration , value:tags::VARIANT tags FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems) WHERE ARRAY_SIZE(v:configurationItems) > 0 ''' db.create_stream( name=f'data.{base_name}_STREAM', target=f'data.{base_name}_STAGING' ) # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=f"COPY INTO data.{base_name}_staging(v) FROM @data.{base_name}_stage/", replace=True, autoingest=True ), n=10, sleep_seconds_btw_retry=1 ) db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute', warehouse=WAREHOUSE, sql=config_ingest_task) db.execute(f"ALTER PIPE {pipe} REFRESH") pipe_description = list(db.fetch(f'DESC PIPE {pipe}')) if len(pipe_description) < 1: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance." } else: sqs_arn = pipe_description[0]['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events: {sqs_arn}" ) }
def finalize(connection_name): base_name = f'nginx_log_{connection_name}' table = next(db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data")) options = yaml.safe_load(table['comment']) stage = options.get('existing_stage', f'data.{base_name}_stage') pipe = f'data.{base_name}_pipe' error_pipe = f'data.{base_name}_error_pipe' # IAM change takes 5-15 seconds to take effect sleep(5) db.retry( lambda: db.create_pipe( name=pipe, sql=( f'COPY INTO data.{base_name}_connection ' f"FROM (SELECT PARSE_JSON($1), HASH($1), regexp_substr($1:request::string, '.*request_?[iI]+d=([^\\\\&\\\\s]+)', 1, 1, 'e')" f", $1:time::timestamp_ltz, $1:bytes_sent::int, $1:connection::int, $1:connection_requests::int" f", nullif($1:deployment_cluster, '-')::string, nullif($1:gzip_ratio, '-')::float, $1:host_header::string" f", upper(split($1:\"host_header\"::string, '.')[0]), nullif(split(split($1:http_user_agent, '(' )[0], '/')[0], '-')::string , nullif($1:http_user_agent,'-')::string" f", nullif($1:http_xff, '-')::string, nullif($1:http_referer, '-')::string, regexp_substr($1:request, '^([A-Z]{{3,4}})\\\\s+.*', 1, 1, 'e')" f", $1:instance_id::string, $1:redirect_counter::int, $1:remote_address::string, $1:request::string, $1:request_time::float, $1:requests_length::int" f", nullif(strip_null_value($1:ssl_session_id),'-')::varchar, nullif(strip_null_value($1:ssl_session_reused),'-')::varchar, $1:status::int, $1:time" f", regexp_count($1:upstream_status, ' : ') + regexp_count($1:upstream_status, ', ') + 1::int " f", nullif(array_slice(split(array_slice(split($1:upstream_address, ' : '),-1,2)[0],', '),-1,2)[0],'-')::varchar " f", nullif(array_slice(split(array_slice(split($1:upstream_response_length, ' : '),-1,2)[0],', '),-1,2)[0],'-')::int " f", nullif(array_slice(split(array_slice(split($1:upstream_response_time, ' : '),-1,2)[0],', '),-1,2)[0],'-')::float " f", nullif(array_slice(split(array_slice(split($1:upstream_status, ' : '),-1,2)[0],', '),-1,2)[0],'-')::int " f'FROM @{stage}/access)' ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) db.retry( lambda: db.create_pipe( name=error_pipe, sql=( f"COPY INTO data.{base_name}_error_connection " f"FROM (SELECT PARSE_JSON($1), HASH($1), $1:instance_id::string, $1:log_level::string," f" $1:message::string, $1:time::timestamp_ltz, $1:pid::int, $1:tid::int " f"FROM @{stage}/error)" ), replace=True, autoingest=True, ), n=10, sleep_seconds_btw_retry=1, ) stage_props = db.fetch_props(f'DESC STAGE {stage}', filter=('URL')) stage_prefix = stage_props['URL'].split('/')[3] log_pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None) error_pipe_description = next(db.fetch(f'DESC PIPE {error_pipe}'), None) if log_pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.", } elif error_pipe_description is None: return { 'newStage': 'error', 'newMessage': f"{error_pipe} does not exist; please reach out to Snowflake Security for assistance.", } else: log_sqs_arn = log_pipe_description['notification_channel'] error_sqs_arn = error_pipe_description['notification_channel'] return { 'newStage': 'finalized', 'newMessage': ( f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events in the {stage_prefix}/access " f"folder of the bucket:\n\n {log_sqs_arn}\n\n" f"Please add this SQS Queue ARN to the bucket event notification " f"channel for all object create events in the {stage_prefix}/error " f"folder of the bucket:\n\n {error_sqs_arn}\n\n" f"Note that the two SQS Queue ARNs may be identical; this is normal.\n\n" f"If you'd like to backfill the table, please run\n\n" f" ALTER PIPE {pipe} REFRESH;\n" f" ALTER PIPE {error_pipe} REFRESH;" ), }