Пример #1
0
def finalize(connection_name):
    base_name = f'ldap_{connection_name}'
    pipe = f'data.{base_name}_pipe'
    table = next(
        db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data"))
    options = yaml.safe_load(table['comment'])
    stage = options.get('existing_stage', f'data.{base_name}_stage')

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(f"COPY INTO data.{base_name}_connection "
                 f"FROM (SELECT $1, $2, $3, $4,"
                 f" to_timestamp_ltz($5, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($6, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($7, 'mm/dd/yyyy hh24:mi:ss (UTC)'),"
                 f" to_timestamp_ltz($8, 'mm/dd/yyyy hh24:mi:ss (UTC)') "
                 f"FROM @{stage}/)"),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        sqs_arn = pipe_description['notification_channel']
        return {
            'newStage':
            'finalized',
            'newMessage':
            (f"Please add this SQS Queue ARN to the bucket event notification "
             f"channel for all object create events:\n\n  {sqs_arn}\n\n"
             f"If you'd like to backfill the table, please run\n\n  ALTER PIPE {pipe} REFRESH;"
             ),
        }
Пример #2
0
def finalize(connection_name):
    base_name = f'GITHUB_WEBHOOKS_S3_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql
            =(f"COPY INTO data.{base_name}_connection "
              f"FROM (SELECT current_timestamp, $1, HASH($1), $1:ref, $1: before, $1:after, $1:created, $1:deleted,"
              f"$1:forced, $1:base_ref, $1:compare, $1:commits, $1:head_commit,"
              f"$1:repository, $1:pusher, $1:organization, $1:sender, $1:action, $1:check_run, $1:check_suite, $1:number, $1:pull_request,"
              f"$1:label, $1:requested_team, $1:ref_type, $1:master_branch, $1:description, $1:pusher_type, $1:review, $1:changes, $1:comment, "
              f"$1:issue, $1:id, $1:sha, $1:name, $1:target_url, $1:context, $1:state, $1:commit, $1:branches, $1:created_at, $1:updated_at, $1:assignee, "
              f"$1:release, $1:membership, $1:alert, $1:scope, $1:member, $1:requested_reviewer, $1:team, $1:starred_at, $1:pages, $1:project_card, "
              f"$1:build, $1:deployment_status, $1:deployment, $1:forkee, $1:milestone, $1:key, $1:project_column, $1:status, $1:avatar_url FROM @data.{base_name}_stage/)"
              ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }
    else:
        sqs_arn = pipe_description['notification_channel']

    return {
        'newStage':
        'finalized',
        'newMessage':
        (f"Please add this SQS Queue ARN to the bucket event notification "
         f"channel for all object create events:\n\n  {sqs_arn}\n\n"
         f"To backfill the landing table with existing data, please run:\n\n  ALTER PIPE {pipe} REFRESH;\n\n"
         ),
    }
Пример #3
0
def finalize(connection_name):
    base_name = f'osquery_log_{connection_name}'
    table = next(
        db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data"))
    options = yaml.load(table['comment'])
    stage = options.get('existing_stage', f'data.{base_name}_stage')
    pipe = f'data.{base_name}_pipe'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql
            =(f'COPY INTO data.{base_name}_connection '
              f'FROM (SELECT PARSE_JSON($1), HASH($1), $1:unixTime::TIMESTAMP_LTZ(9), $1:action, '
              f'$1:calendarTime, $1:columns, $1:counter, $1:epoch, $1:hostIdentifier, $1:instance_id, '
              f'$1:name, $1:unixTime, $1:decorations '
              f'FROM @{stage}/)'),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage':
            'error',
            'newMessage':
            f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        sqs_arn = pipe_description['notification_channel']
        return {
            'newStage':
            'finalized',
            'newMessage':
            (f"Please add this SQS Queue ARN to the bucket event notification "
             f"channel for all object create events:\n\n  {sqs_arn}\n\n"
             f"If you'd like to backfill the table, please run\n\n  ALTER PIPE {pipe} REFRESH;"
             ),
        }
Пример #4
0
def finalize(connection_name):
    base_name = f'aws_vpc_flow_log_{connection_name}'
    pipe = f'data.{base_name}_pipe'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(
                f'COPY INTO data.{base_name}_connection '
                f'FROM (SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 '
                f'FROM @data.{base_name}_stage/)'
            ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        sqs_arn = pipe_description['notification_channel']
        return {
            'newStage': 'finalized',
            'newMessage': (
                f"Please add this SQS Queue ARN to the bucket event notification "
                f"channel for all object create events:\n\n  {sqs_arn}\n\n"
                f"If you'd like to backfill the table, please run\n\n  ALTER PIPE {pipe} REFRESH;"
            ),
        }
Пример #5
0
def finalize(connection_name):
    base_name = f'AWS_CONFIG_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    DATE_REGEXP = r'.+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z.*'.replace("\\", "\\\\")
    DATE_ISO8601_BACKREFERENCES = r'\1-\2-\3T\4:\5:\6Z'.replace("\\", "\\\\")

    config_ingest_task = f'''
INSERT INTO {landing_table} (
  raw, hash_raw, event_time, configuration_item_capture_time, account_id, aws_region, resource_type, arn,
  availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags
)
SELECT value raw
    , HASH(value) hash_raw
    , REGEXP_REPLACE(filename, '{DATE_REGEXP}', '{DATE_ISO8601_BACKREFERENCES}')::TIMESTAMP_LTZ event_time
    , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) configuration_item_capture_time
    , value:awsAccountId::STRING account_id
    , value:awsRegion::STRING aws_region
    , value:resourceType::STRING aws_region
    , value:ARN::STRING arn
    , value:availabilityZone::STRING availability_zone
    , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time
    , value:resourceName::STRING resource_name
    , value:resourceId::STRING resource_Id
    , value:relationships::VARIANT relationships
    , value:configuration::VARIANT configuration
    , value:tags::VARIANT tags
FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems)
WHERE ARRAY_SIZE(v:configurationItems) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_stream',
        target=f'data.{base_name}_staging'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(
                f"COPY INTO data.{base_name}_staging(v, filename) "
                f"FROM (SELECT $1, metadata$filename FROM @data.{base_name}_stage/)"
            ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=config_ingest_task)

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification "
            f"channel for all object create events:\n\n  {sqs_arn}\n\n"
            f"To backfill the landing table with existing data, please run:\n\n  ALTER PIPE {pipe} REFRESH;\n\n"
        )
    }
Пример #6
0
def finalize(connection_name):
    base_name = f'AWS_CLOUDTRAIL_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    # Step two: Configure the remainder once the role is properly configured.
    cloudtrail_ingest_task = f'''
INSERT INTO {landing_table} (
  insert_time, raw, hash_raw, event_time, aws_region, event_id, event_name, event_source, event_type,
  event_version, recipient_account_id, request_id, request_parameters, response_elements, source_ip_address,
  user_agent, user_identity, user_identity_type, user_identity_principal_id, user_identity_arn,
  user_identity_accountid, user_identity_invokedby, user_identity_access_key_id, user_identity_username,
  user_identity_session_context_attributes_mfa_authenticated, user_identity_session_context_attributes_creation_date,
  user_identity_session_context_session_issuer_type, user_identity_session_context_session_issuer_principal_id,
  user_identity_session_context_session_issuer_arn, user_identity_session_context_session_issuer_account_id,
  user_identity_session_context_session_issuer_user_name, error_code, error_message, additional_event_data,
  api_version, read_only, resources, service_event_details, shared_event_id, vpc_endpoint_id
)
SELECT CURRENT_TIMESTAMP() insert_time
    , value raw
    , HASH(value) hash_raw
    --- In the rare event of an unparsable timestamp, the following COALESCE keeps the pipeline from failing.
    --- Compare event_time to TRY_TO_TIMESTAMP(raw:eventTime::STRING) to establish if the timestamp was parsed.
    , COALESCE(
        TRY_TO_TIMESTAMP(value:eventTime::STRING)::TIMESTAMP_LTZ(9),
        CURRENT_TIMESTAMP()
      ) event_time
    , value:awsRegion::STRING aws_region
    , value:eventID::STRING event_id
    , value:eventName::STRING event_name
    , value:eventSource::STRING event_source
    , value:eventType::STRING event_type
    , value:eventVersion::STRING event_version
    , value:recipientAccountId::STRING recipient_account_id
    , value:requestID::STRING request_id
    , value:requestParameters::VARIANT request_parameters
    , value:responseElements::VARIANT response_elements
    , value:sourceIPAddress::STRING source_ip_address
    , value:userAgent::STRING user_agent
    , value:userIdentity::VARIANT user_identity
    , value:userIdentity.type::STRING user_identity_type
    , value:userIdentity.principalId::STRING user_identity_principal_id
    , value:userIdentity.arn::STRING user_identity_arn
    , value:userIdentity.accountId::STRING user_identity_accountid
    , value:userIdentity.invokedBy::STRING user_identity_invokedby
    , value:userIdentity.accessKeyId::STRING user_identity_access_key_id
    , value:userIdentity.userName::STRING user_identity_username
    , value:userIdentity.sessionContext.attributes.mfaAuthenticated::STRING user_identity_session_context_attributes_mfa_authenticated
    , value:userIdentity.sessionContext.attributes.creationDate::STRING user_identity_session_context_attributes_creation_date
    , value:userIdentity.sessionContext.sessionIssuer.type::STRING user_identity_session_context_session_issuer_type
    , value:userIdentity.sessionContext.sessionIssuer.principalId::STRING user_identity_session_context_session_issuer_principal_id
    , value:userIdentity.sessionContext.sessionIssuer.arn::STRING user_identity_session_context_session_issuer_arn
    , value:userIdentity.sessionContext.sessionIssuer.accountId::STRING user_identity_session_context_session_issuer_account_id
    , value:userIdentity.sessionContext.sessionIssuer.userName::STRING user_identity_session_context_session_issuer_user_name
    , value:errorCode::STRING error_code
    , value:errorMessage::STRING error_message
    , value:additionalEventData::VARIANT additional_event_data
    , value:apiVersion::STRING api_version
    , value:readOnly::BOOLEAN read_only
    , value:resources::VARIANT resources
    , value:serviceEventDetails::STRING service_event_details
    , value:sharedEventId::STRING shared_event_id
    , value:vpcEndpointId::STRING vpc_endpoint_id
FROM data.{base_name}_STREAM, table(flatten(input => v:Records))
WHERE ARRAY_SIZE(v:Records) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_STREAM',
        target=f'data.{base_name}_STAGING'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=f"COPY INTO data.{base_name}_STAGING(v) FROM @data.{base_name}_STAGE/",
            replace=True,
            autoingest=True
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=cloudtrail_ingest_task)

    db.execute(f"ALTER PIPE {pipe} REFRESH")

    pipe_description = list(db.fetch(f'DESC PIPE {pipe}'))
    if len(pipe_description) < 1:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} doesn't exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description[0]['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification"
            f"channel for all object create events: {sqs_arn}"
        )
    }
Пример #7
0
def connect(connection_name, options):
    connection_type = options['connection_type']

    base_name = f"azure_{connection_name}_{connection_type}"
    account_name = options['account_name']
    container_name = options['container_name']
    suffix = options['suffix']
    sas_token = options['sas_token']
    sas_token_ct = vault.encrypt(sas_token)

    comment = f'''
---
module: azure
storage_account: {account_name}
container_name: {container_name}
suffix: {suffix}
sas_token: {sas_token_ct}
sa_user: {USER}
snowflake_account: {ACCOUNT}
database: {DATABASE}
'''

    db.create_stage(
        name=f'data.{base_name}_STAGE',
        url=f"azure://{account_name}.blob.{suffix}/{container_name}",
        cloud='azure',
        prefix='',
        credentials=sas_token,
        file_format=FILE_FORMAT)

    db.execute(
        f'GRANT USAGE ON STAGE data.{base_name}_STAGE TO ROLE {SA_ROLE}')

    db.create_table(name=f'data.{base_name}_CONNECTION',
                    cols=LANDING_TABLES_COLUMNS[connection_type],
                    comment=comment)

    db.execute(
        f'GRANT INSERT, SELECT ON data.{base_name}_CONNECTION TO ROLE {SA_ROLE}'
    )

    pipe_sql = {
        'operation':
        f'''
COPY INTO DATA.{base_name}_CONNECTION(RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS,
                                 IDENTITY, IDENTITY_AUTHORIZATION, IDENTITY_CLAIMS, LEVEL, LOCATION,
                                 OPERATION_NAME, PROPERTIES, PROPERTIES_ANCESTORS, PROPERTIES_IS_COMPLIANCE_CHECK,
                                 PROPERTIES_POLICIES, PROPERTIES_RESOURCE_LOCAATION, RESOURCE_ID, RESULT_SIGNATURE,
                                 RESULT_TYPE, EVENT_TIME, LOADED_ON)
FROM (
    SELECT $1, HASH($1), $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING,
        $1:durationMs::NUMBER, $1:identity::VARIANT, $1:identity.authorization::VARIANT, $1:identity.claims::VARIANT,
        $1:level::STRING, $1:location::STRING, $1:operationName::STRING, $1:properties::VARIANT,
        $1:properties.ancestors::STRING, $1:properties.isComplianceCheck::STRING, PARSE_JSON($1:properties.policies),
        $1:properties.resourceLocation::STRING, $1:resourceId::STRING, $1:resultSignature::STRING,
        $1:resultType::STRING, $1:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP()
    FROM @DATA.{base_name}_STAGE)
''',
        'audit':
        f'''
COPY INTO data.{base_name}_CONNECTION (RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID,
                                  DURATION_MS, LEVEL, OPERATION_NAME, OPERATION_VERSION, PROPERTIES,
                                  PROPERTIES_ACTIVITY_DATE_TIME, PROPERTIES_ACTIVITY_DISPLAY_NAME,
                                  PROPERTIES_ADDITIONAL_DETAILS, PROPERTIES_CATEGORY, PROPERTIES_ID,
                                  PROPERTIES_INITIATED_BY, PROPERTIES_LOGGED_BY_SERVICE, PROPERTIES_OPERATION_TYPE,
                                  PROPERTIES_RESULT, PROPERTIES_RESULT_REASON, PROPERTIES_TARGET_RESOURCES,
                                  RESOURCE_ID, RESULT_SIGNATURE, TENANT_ID, EVENT_TIME, LOADED_ON)
FROM (
    SELECT $1, HASH($1), $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING,
        $1:durationMs::NUMBER, $1:level::STRING, $1:operationName::STRING, $1:operationVersion::STRING,
        $1:properties::VARIANT, $1:properties.activityDateTime::TIMESTAMP_LTZ,
        $1:properties.activityDisplayName::STRING, $1:properties.additionalDetails::VARIANT,
        $1:properties.category::STRING, $1:properties.id::STRING, $1:properties.initiatedBy::VARIANT,
        $1:properties.loggedByService::STRING, $1:properties.operationType::STRING, $1:properties.result::STRING,
        $1:resultReason::STRING, $1:properties.targetResources::VARIANT, $1:resourceId::STRING,
        $1:resultSignature::STRING, $1:tenantId::STRING, $1:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP()
  FROM @data.{base_name}_STAGE
)
''',
        'signin':
        f'''
COPY INTO DATA.{base_name}_CONNECTION (
    RAW, HASH_RAW, LEVEL, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS,
    IDENTITY, LOCATION, OPERATION_NAME, OPERATION_VERSION, PROPERTIES,
    PROPERTIES_APP_DISPLAY_NAME, PROPERTIES_APP_ID,
    PROPERTIES_APPLIED_CONDITIONAL_ACESS_POLICIES, PROPERTIES_AUTHENTICATION_METHODS_USED,
    PROPERTIES_AUTHENTICATION_PROCESSING_DETAILS, PROPERTIES_CLIENT_APP_USED,
    PROPERTIES_CONDITIONAL_ACCESS_STATUS, PROPERTIES_CREATED_DATE_TIME,
    PROPERTIES_DEVICE_DETAIL, PROPERTIES_ID, PROPERTIES_IP_ADDRESS, PROPERTIES_IS_INTERACTIVE, PROPERTIES_LOCATION,
    PROPERTIES_MFA_DETAIL, PROPERTIES_NETWORK_LOCATION, PROPERTIES_PROCESSING_TIME_IN_MILLISECONDS,
    PROPERTIES_RESOURCE_DISPLAY_NAME, PROPERTIES_RESOURCE_ID, PROPERTIES_RISK_DETAIL,
    PROPERTIES_RISK_EVENT_TYPES, PROPERTIES_RISK_LEVEL_AGGREGATED, PROPERTIES_RISK_LEVEL_DURING_SIGNIN,
    PROPERTIES_RISK_STATE, PROPERTIES_STATUS, PROPERTIES_TOKEN_ISSUER_TYPE, PROPERTIES_USER_DISPLAY_NAME,
    PROPERTIES_USER_ID, PROPERTIES_USER_PRINCIPAL_NAME, RESOURCE_ID, RESULT_DESCRIPTION, RESULT_SIGNATURE,
    RESULT_TYPE, TENANT_ID, EVENT_TIME, LOADED_ON
)
FROM (
    SELECT $1, HASH($1), $1:Level::NUMBER, $1:callerIpAddress::STRING, $1:category::STRING, $1:correlationId::STRING,
        $1:durationMs, $1:identity::STRING, $1:location::STRING, $1:operationName::STRING,
        $1:operationVersion::STRING, $1:properties::VARIANT, $1:properties.appDisplayName::STRING,
        $1:properties.appId::STRING, $1:properties.appliedConditionalAccessPolicies::VARIANT,
        $1:properties.authenticationMethodsUsed::VARIANT, $1:properties.authenticationProcessingDetails::VARIANT,
        $1:properties.clientAppUsed::STRING, $1:properties.conditionalAccessStatus::STRING,
        $1:properties.createdDateTime::TIMESTAMP_LTZ, $1:properties.deviceDetail::VARIANT, $1:properties.id::STRING,
        $1:properties.ipAddress::STRING, $1:properties.isInteractive::BOOLEAN, $1:properties.location::VARIANT,
        $1:properties.mfaDetail::VARIANT, $1:properties.networkLocationDetails::VARIANT,
        $1:properties.processingTimeInMilliseconds::NUMBER, $1:properties.resourceDisplayName::STRING,
        $1:properties.resourceId::STRING, $1:properties.riskDetail::STRING, $1:properties.riskEventTypes::VARIANT,
        $1:properties.riskLevelAggregated::STRING, $1:properties.riskLevelDuringSignIn::STRING,
        $1:properties.riskState::VARIANT, $1:properties.status::VARIANT, $1:properties.tokenIssuerType::STRING,
        $1:properties.userDisplayName::STRING, $1:properties.userId::STRING, $1:properties.userPrincipalName::STRING,
        $1:resourceId::STRING, $1:resultDescription::STRING, $1:resultSignature::STRING, $1:resultType::STRING,
        $1:tenantId::STRING, $1:time::TIMESTAMP_LTZ,
        CURRENT_TIMESTAMP()
    FROM @DATA.{base_name}_STAGE
)
'''
    }

    db.create_pipe(name=f"data.{base_name}_PIPE",
                   sql=pipe_sql[options['connection_type']],
                   replace=True)

    db.execute(
        f'ALTER PIPE data.{base_name}_PIPE SET PIPE_EXECUTION_PAUSED=true')
    db.execute(
        f'GRANT OWNERSHIP ON PIPE data.{base_name}_PIPE TO ROLE {SA_ROLE}')

    return {
        'newStage': 'finalized',
        'newMessage': 'Table, Stage, and Pipe created'
    }
Пример #8
0
def finalize(connection_name):
    base_name = f'CONFIG_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    config_ingest_task = f'''
INSERT INTO {landing_table} (
  raw, hash_raw, event_time, account_id, aws_region, resource_type, arn, availability_zone,
  resource_creation_time, resource_name, resource_Id, relationships, configuration, tags
)
SELECT value raw
    , HASH(value) hash_raw
    , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) event_time
    , value:awsAccountId::STRING account_id
    , value:awsRegion::STRING aws_region
    , value:resourceType::STRING aws_region
    , value:ARN::STRING arn
    , value:availabilityZone::STRING availability_zone
    , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time
    , value:resourceName::STRING resource_name
    , value:resourceId::STRING resource_Id
    , value:relationships::VARIANT relationships
    , value:configuration::VARIANT configuration
    , value:tags::VARIANT tags
FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems)
WHERE ARRAY_SIZE(v:configurationItems) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_STREAM',
        target=f'data.{base_name}_STAGING'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=f"COPY INTO data.{base_name}_staging(v) FROM @data.{base_name}_stage/",
            replace=True,
            autoingest=True
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=config_ingest_task)

    db.execute(f"ALTER PIPE {pipe} REFRESH")

    pipe_description = list(db.fetch(f'DESC PIPE {pipe}'))
    if len(pipe_description) < 1:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description[0]['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification "
            f"channel for all object create events: {sqs_arn}"
        )
    }
Пример #9
0
def finalize(connection_name):
    base_name = f'nginx_log_{connection_name}'
    table = next(db.fetch(f"SHOW TABLES LIKE '{base_name}_connection' IN data"))
    options = yaml.safe_load(table['comment'])
    stage = options.get('existing_stage', f'data.{base_name}_stage')
    pipe = f'data.{base_name}_pipe'
    error_pipe = f'data.{base_name}_error_pipe'

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(
                f'COPY INTO data.{base_name}_connection '
                f"FROM (SELECT PARSE_JSON($1), HASH($1), regexp_substr($1:request::string, '.*request_?[iI]+d=([^\\\\&\\\\s]+)', 1, 1, 'e')"
                f", $1:time::timestamp_ltz, $1:bytes_sent::int, $1:connection::int, $1:connection_requests::int"
                f", nullif($1:deployment_cluster, '-')::string, nullif($1:gzip_ratio, '-')::float, $1:host_header::string"
                f", upper(split($1:\"host_header\"::string, '.')[0]), nullif(split(split($1:http_user_agent, '(' )[0], '/')[0], '-')::string , nullif($1:http_user_agent,'-')::string"
                f", nullif($1:http_xff, '-')::string, nullif($1:http_referer, '-')::string, regexp_substr($1:request, '^([A-Z]{{3,4}})\\\\s+.*', 1, 1, 'e')"
                f", $1:instance_id::string, $1:redirect_counter::int, $1:remote_address::string, $1:request::string, $1:request_time::float, $1:requests_length::int"
                f", nullif(strip_null_value($1:ssl_session_id),'-')::varchar, nullif(strip_null_value($1:ssl_session_reused),'-')::varchar, $1:status::int, $1:time"
                f", regexp_count($1:upstream_status, ' : ') + regexp_count($1:upstream_status, ', ') + 1::int "
                f", nullif(array_slice(split(array_slice(split($1:upstream_address, ' : '),-1,2)[0],', '),-1,2)[0],'-')::varchar "
                f", nullif(array_slice(split(array_slice(split($1:upstream_response_length, ' : '),-1,2)[0],', '),-1,2)[0],'-')::int "
                f", nullif(array_slice(split(array_slice(split($1:upstream_response_time, ' : '),-1,2)[0],', '),-1,2)[0],'-')::float "
                f", nullif(array_slice(split(array_slice(split($1:upstream_status, ' : '),-1,2)[0],', '),-1,2)[0],'-')::int "
                f'FROM @{stage}/access)'
            ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    db.retry(
        lambda: db.create_pipe(
            name=error_pipe,
            sql=(
                f"COPY INTO data.{base_name}_error_connection "
                f"FROM (SELECT PARSE_JSON($1), HASH($1), $1:instance_id::string, $1:log_level::string,"
                f" $1:message::string, $1:time::timestamp_ltz, $1:pid::int, $1:tid::int "
                f"FROM @{stage}/error)"
            ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1,
    )

    stage_props = db.fetch_props(f'DESC STAGE {stage}', filter=('URL'))
    stage_prefix = stage_props['URL'].split('/')[3]
    log_pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    error_pipe_description = next(db.fetch(f'DESC PIPE {error_pipe}'), None)
    if log_pipe_description is None:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }
    elif error_pipe_description is None:
        return {
            'newStage': 'error',
            'newMessage': f"{error_pipe} does not exist; please reach out to Snowflake Security for assistance.",
        }

    else:
        log_sqs_arn = log_pipe_description['notification_channel']
        error_sqs_arn = error_pipe_description['notification_channel']
        return {
            'newStage': 'finalized',
            'newMessage': (
                f"Please add this SQS Queue ARN to the bucket event notification "
                f"channel for all object create events in the {stage_prefix}/access "
                f"folder of the bucket:\n\n  {log_sqs_arn}\n\n"
                f"Please add this SQS Queue ARN to the bucket event notification "
                f"channel for all object create events in the {stage_prefix}/error "
                f"folder of the bucket:\n\n   {error_sqs_arn}\n\n"
                f"Note that the two SQS Queue ARNs may be identical; this is normal.\n\n"
                f"If you'd like to backfill the table, please run\n\n"
                f"  ALTER PIPE {pipe} REFRESH;\n"
                f"  ALTER PIPE {error_pipe} REFRESH;"
            ),
        }