示例#1
0
def test_enabled_firehose_logs_disabled():
    """CLI - Terraform Common - Expected Firehose Logs - Disable"""
    CONFIG['global']['infrastructure']['firehose'] = {
        'enabled': True,
        'disabled_logs': [
            'test_log_type_json',
            'test_log_type_csv',
            'test_log_type_syslog'
        ]
    }
    firehose_logs = set(_common.enabled_firehose_logs(CONFIG))

    expected_logs = {
        'cloudwatch_test_match_types',
        # 'test_log_type_csv',
        'test_log_type_csv_nested',
        'test_log_type_json_nested',
        'test_log_type_json_nested_with_data',
        # 'test_log_type_json',
        'test_log_type_kv_auditd',
        'test_multiple_schemas_01',
        'test_multiple_schemas_02',
        'test_log_type_json_2',
        'test_log_type_json_nested_osquery',
        # 'test_log_type_syslog',
        'test_cloudtrail',
        'unit_test_simple_log'
    }

    assert_equal(firehose_logs, expected_logs)
示例#2
0
def setup_mock_firehose_delivery_streams(config):
    """Mock Kinesis Firehose Streams for rule testing

    Args:
        config (CLIConfig): The StreamAlert config
    """
    region = config['global']['account']['region']
    for log_type in enabled_firehose_logs(config):
        stream_name = 'streamalert_data_{}'.format(log_type)
        prefix = '{}/'.format(log_type)
        create_delivery_stream(region, stream_name, prefix)
示例#3
0
def test_enabled_firehose_logs():
    """CLI - Terraform Common - Expected Firehose Logs """
    firehose_logs = set(_common.enabled_firehose_logs(CONFIG))

    expected_logs = {
        'cloudwatch_test_match_types', 'test_log_type_csv',
        'test_log_type_csv_nested', 'test_log_type_json_nested',
        'test_log_type_json_nested_with_data', 'test_log_type_json',
        'test_log_type_kv_auditd', 'test_multiple_schemas_01',
        'test_multiple_schemas_02', 'test_log_type_json_2',
        'test_log_type_json_nested_osquery', 'test_log_type_syslog',
        'test_cloudtrail', 'unit_test_simple_log'
    }

    assert_equal(firehose_logs, expected_logs)
示例#4
0
def setup_mock_firehose_delivery_streams(config):
    """Mock Kinesis Firehose Streams for rule testing

    Args:
        config (CLIConfig): The StreamAlert config
    """
    region = config['global']['account']['region']
    firehose_client = boto3.client('firehose', region_name=region)
    for log_type in enabled_firehose_logs(config):
        firehose_client.create_delivery_stream(
            DeliveryStreamName='streamalert_data_{}'.format(log_type),
            S3DestinationConfiguration={
                'RoleARN':
                'arn:aws:iam::123456789012:role/firehose_delivery_role',
                'BucketARN': 'arn:aws:s3:::kinesis-test',
                'Prefix': '{}/'.format(log_type),
                'BufferingHints': {
                    'SizeInMBs': 123,
                    'IntervalInSeconds': 124
                },
                'CompressionFormat': 'Snappy',
            })
示例#5
0
def generate_main(**kwargs):
    """Generate the main.tf.json Terraform dict

    Keyword Args:
        init (bool): If Terraform is running in the init phase or not
        config (CLIConfig): The loaded CLI config

    Returns:
        dict: main.tf.json Terraform dict
    """
    init = kwargs.get('init')
    config = kwargs['config']
    main_dict = infinitedict()

    # Configure provider along with the minimum version
    main_dict['provider']['aws'] = {'version': '~> 0.1.4'}

    # Configure Terraform version requirement
    main_dict['terraform']['required_version'] = '~> 0.10.6'

    # Setup the Backend dependencing on the deployment phase.
    # When first setting up StreamAlert, the Terraform statefile
    # is stored locally.  After the first dependencies are created,
    # this moves to S3.
    if init:
        main_dict['terraform']['backend']['local'] = {
            'path': 'terraform.tfstate'}
    else:
        main_dict['terraform']['backend']['s3'] = {
            'bucket': '{}.streamalert.terraform.state'.format(
                config['global']['account']['prefix']),
            'key': 'stream_alert_state/terraform.tfstate',
            'region': config['global']['account']['region'],
            'encrypt': True,
            'acl': 'private',
            'kms_key_id': 'alias/{}'.format(config['global']['account']['kms_key_alias'])}

    logging_bucket = '{}.streamalert.s3-logging'.format(
        config['global']['account']['prefix'])
    logging_bucket_lifecycle = {
        'prefix': '/',
        'enabled': True,
        'transition': {
            'days': 30,
            'storage_class': 'GLACIER'}}

    # Configure initial S3 buckets
    main_dict['resource']['aws_s3_bucket'] = {
        'lambda_source': generate_s3_bucket(
            bucket=config['lambda']['rule_processor_config']['source_bucket'],
            logging=logging_bucket
        ),
        'stream_alert_secrets': generate_s3_bucket(
            bucket='{}.streamalert.secrets'.format(config['global']['account']['prefix']),
            logging=logging_bucket
        ),
        'terraform_remote_state': generate_s3_bucket(
            bucket=config['global']['terraform']['tfstate_bucket'],
            logging=logging_bucket
        ),
        'logging_bucket': generate_s3_bucket(
            bucket=logging_bucket,
            acl='log-delivery-write',
            logging=logging_bucket,
            lifecycle_rule=logging_bucket_lifecycle
        ),
        'streamalerts': generate_s3_bucket(
            bucket='{}.streamalerts'.format(config['global']['account']['prefix']),
            logging=logging_bucket
        )
    }

    # Conditionally configure Firehose
    if config['global']['infrastructure'].get('firehose', {}).get('enabled'):
        firehose_config = config['global']['infrastructure']['firehose']
        firehose_s3_bucket_suffix = firehose_config.get('s3_bucket_suffix',
                                                        'streamalert.data')
        firehose_s3_bucket_name = '{}.{}'.format(config['global']['account']['prefix'],
                                                 firehose_s3_bucket_suffix)

        # Add the main Firehose module
        main_dict['module']['kinesis_firehose'] = {
            'source': 'modules/tf_stream_alert_kinesis_firehose',
            'account_id': config['global']['account']['aws_account_id'],
            'region': config['global']['account']['region'],
            'prefix': config['global']['account']['prefix'],
            'logs': enabled_firehose_logs(config),
            'buffer_size': config['global']['infrastructure']
                           ['firehose'].get('buffer_size', 64),
            'buffer_interval': config['global']['infrastructure']
                               ['firehose'].get('buffer_interval', 300),
            'compression_format': config['global']['infrastructure']
                                  ['firehose'].get('compression_format', 'GZIP'),
            's3_logging_bucket': logging_bucket,
            's3_bucket_name': firehose_s3_bucket_name
        }

    # KMS Key and Alias creation
    main_dict['resource']['aws_kms_key']['stream_alert_secrets'] = {
        'enable_key_rotation': True,
        'description': 'StreamAlert secret management'
    }
    main_dict['resource']['aws_kms_alias']['stream_alert_secrets'] = {
        'name': 'alias/{}'.format(config['global']['account']['kms_key_alias']),
        'target_key_id': '${aws_kms_key.stream_alert_secrets.key_id}'
    }

    # Global infrastructure settings
    infrastructure_config = config['global'].get('infrastructure')
    if infrastructure_config and 'monitoring' in infrastructure_config:
        if infrastructure_config['monitoring'].get('create_sns_topic'):
            main_dict['resource']['aws_sns_topic']['stream_alert_monitoring'] = {
                'name': DEFAULT_SNS_MONITORING_TOPIC
            }

    # Add any global cloudwatch alarms to the main.tf
    monitoring_config = config['global']['infrastructure'].get('monitoring')
    if not monitoring_config:
        return main_dict

    global_metrics = monitoring_config.get('metric_alarms')
    if not global_metrics:
        return main_dict

    topic_name = (DEFAULT_SNS_MONITORING_TOPIC if infrastructure_config
                  ['monitoring'].get('create_sns_topic') else
                  infrastructure_config['monitoring'].get('sns_topic_name'))

    sns_topic_arn = 'arn:aws:sns:{region}:{account_id}:{topic}'.format(
        region=config['global']['account']['region'],
        account_id=config['global']['account']['aws_account_id'],
        topic=topic_name
    )

    formatted_alarms = {}
    # Add global metric alarms for the rule and alert processors
    for func in FUNC_PREFIXES:
        if func not in global_metrics:
            continue

        for name, settings in global_metrics[func].iteritems():
            alarm_info = settings.copy()
            alarm_info['alarm_name'] = name
            alarm_info['namespace'] = 'StreamAlert'
            alarm_info['alarm_actions'] = [sns_topic_arn]
            # Terraform only allows certain characters in resource names
            acceptable_chars = ''.join([string.digits, string.letters, '_-'])
            name = filter(acceptable_chars.__contains__, name)
            formatted_alarms['metric_alarm_{}'.format(name)] = alarm_info

    if formatted_alarms:
        main_dict['resource']['aws_cloudwatch_metric_alarm'] = formatted_alarms

    return main_dict
def athena_handler(options):
    """Handle Athena operations"""
    athena_client = StreamAlertAthenaClient(
        CONFIG, results_key_prefix='stream_alert_cli')

    if options.subcommand == 'init':
        CONFIG.generate_athena()

    elif options.subcommand == 'enable':
        CONFIG.set_athena_lambda_enable()

    elif options.subcommand == 'create-db':
        if athena_client.check_database_exists():
            LOGGER_CLI.info(
                'The \'streamalert\' database already exists, nothing to do')
            return

        create_db_success, create_db_result = athena_client.run_athena_query(
            query='CREATE DATABASE streamalert')

        if create_db_success and create_db_result['ResultSet'].get('Rows'):
            LOGGER_CLI.info('streamalert database successfully created!')
            LOGGER_CLI.info('results: %s',
                            create_db_result['ResultSet']['Rows'])

    elif options.subcommand == 'create-table':
        if not options.bucket:
            LOGGER_CLI.error('Missing command line argument --bucket')
            return

        if not options.refresh_type:
            LOGGER_CLI.error('Missing command line argument --refresh_type')
            return

        if options.type == 'data':
            if not options.table_name:
                LOGGER_CLI.error('Missing command line argument --table_name')
                return

            if options.table_name not in enabled_firehose_logs(CONFIG):
                LOGGER_CLI.error(
                    'Table name %s missing from configuration or '
                    'is not enabled.', options.table_name)
                return

            if athena_client.check_table_exists(options.table_name):
                LOGGER_CLI.info('The \'%s\' table already exists.',
                                options.table_name)
                return

            log_info = CONFIG['logs'][options.table_name.replace('_', ':', 1)]
            schema = dict(log_info['schema'])
            schema_statement = ''

            sanitized_schema = StreamAlert.sanitize_keys(schema)

            athena_schema = {}
            schema_type_mapping = {
                'string': 'string',
                'integer': 'int',
                'boolean': 'boolean',
                'float': 'decimal',
                dict: 'map<string, string>',
                list: 'array<string>'
            }

            def add_to_athena_schema(schema, root_key=''):
                """Helper function to add sanitized schemas to the Athena table schema"""
                # Setup the root_key dict
                if root_key and not athena_schema.get(root_key):
                    athena_schema[root_key] = {}

                for key_name, key_type in schema.iteritems():
                    # When using special characters in the beginning or end
                    # of a column name, they have to be wrapped in backticks
                    key_name = '`{}`'.format(key_name)

                    special_key = None
                    # Transform the {} or [] into hashable types
                    if key_type == {}:
                        special_key = dict
                    elif key_type == []:
                        special_key = list
                    # Cast nested dict as a string for now
                    # TODO(jacknagz): support recursive schemas
                    elif isinstance(key_type, dict):
                        special_key = 'string'

                    # Account for envelope keys
                    if root_key:
                        if special_key is not None:
                            athena_schema[root_key][
                                key_name] = schema_type_mapping[special_key]
                        else:
                            athena_schema[root_key][
                                key_name] = schema_type_mapping[key_type]
                    else:
                        if special_key is not None:
                            athena_schema[key_name] = schema_type_mapping[
                                special_key]
                        else:
                            athena_schema[key_name] = schema_type_mapping[
                                key_type]

            add_to_athena_schema(sanitized_schema)

            # Support envelope keys
            configuration_options = log_info.get('configuration')
            if configuration_options:
                envelope_keys = configuration_options.get('envelope_keys')
                if envelope_keys:
                    sanitized_envelope_keys = StreamAlert.sanitize_keys(
                        envelope_keys)
                    # Note: this key is wrapped in backticks to be Hive compliant
                    add_to_athena_schema(sanitized_envelope_keys,
                                         '`streamalert:envelope_keys`')

            for key_name, key_type in athena_schema.iteritems():
                # Account for nested structs
                if isinstance(key_type, dict):
                    struct_schema = ''.join([
                        '{0}:{1},'.format(sub_key, sub_type)
                        for sub_key, sub_type in key_type.iteritems()
                    ])
                    nested_schema_statement = '{0} struct<{1}>, '.format(
                        key_name,
                        # Use the minus index to remove the last comma
                        struct_schema[:-1])
                    schema_statement += nested_schema_statement
                else:
                    schema_statement += '{0} {1},'.format(key_name, key_type)

            query = (
                'CREATE EXTERNAL TABLE {table_name} ({schema}) '
                'PARTITIONED BY (dt string) '
                'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' '
                'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                    table_name=options.table_name,
                    # Use the minus index to remove the last comma
                    schema=schema_statement[:-1],
                    bucket=options.bucket))

        elif options.type == 'alerts':
            if athena_client.check_table_exists(options.type):
                LOGGER_CLI.info('The \'alerts\' table already exists.')
                return

            query = ('CREATE EXTERNAL TABLE alerts ('
                     'log_source string,'
                     'log_type string,'
                     'outputs array<string>,'
                     'record string,'
                     'rule_description string,'
                     'rule_name string,'
                     'source_entity string,'
                     'source_service string)'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/alerts/\''.format(
                         bucket=options.bucket))

        if query:
            create_table_success, _ = athena_client.run_athena_query(
                query=query, database='streamalert')

            if create_table_success:
                CONFIG['lambda']['athena_partition_refresh_config'] \
                      ['refresh_type'][options.refresh_type][options.bucket] = options.type
                CONFIG.write()
                table_name = options.type if options.type == 'alerts' else options.table_name
                LOGGER_CLI.info('The %s table was successfully created!',
                                table_name)
示例#7
0
def athena_handler(options):
    """Handle Athena operations"""
    athena_client = StreamAlertAthenaClient(
        CONFIG, results_key_prefix='stream_alert_cli')

    if options.subcommand == 'init':
        CONFIG.generate_athena()

    elif options.subcommand == 'enable':
        CONFIG.set_athena_lambda_enable()

    elif options.subcommand == 'create-db':
        if athena_client.check_database_exists():
            LOGGER_CLI.info(
                'The \'streamalert\' database already exists, nothing to do')
            return

        create_db_success, create_db_result = athena_client.run_athena_query(
            query='CREATE DATABASE streamalert')

        if create_db_success and create_db_result['ResultSet'].get('Rows'):
            LOGGER_CLI.info('streamalert database successfully created!')
            LOGGER_CLI.info('results: %s',
                            create_db_result['ResultSet']['Rows'])

    elif options.subcommand == 'create-table':
        if not options.bucket:
            LOGGER_CLI.error('Missing command line argument --bucket')
            return

        if not options.refresh_type:
            LOGGER_CLI.error('Missing command line argument --refresh_type')
            return

        if options.type == 'data':
            if not options.table_name:
                LOGGER_CLI.error('Missing command line argument --table_name')
                return

            if options.table_name not in enabled_firehose_logs(CONFIG):
                LOGGER_CLI.error(
                    'Table name %s missing from configuration or '
                    'is not enabled.', options.table_name)
                return

            if athena_client.check_table_exists(options.table_name):
                LOGGER_CLI.info('The \'%s\' table already exists.',
                                options.table_name)
                return

            schema = CONFIG['logs'][options.table_name.replace('_',
                                                               ':')]['schema']
            sanitized_schema = StreamAlert.sanitize_keys(schema)

            athena_schema = {}
            schema_type_mapping = {
                'string': 'string',
                'integer': 'int',
                'boolean': 'boolean',
                'float': 'decimal',
                dict: 'map<string, string>',
                list: 'array<string>'
            }

            for key_name, key_type in sanitized_schema.iteritems():
                # Transform the {} or [] into hashable types
                if key_type == {}:
                    key_type = dict
                elif key_type == []:
                    key_type = list

                athena_schema[key_name] = schema_type_mapping[key_type]

            schema_statement = ''.join([
                '{0} {1},'.format(key_name, key_type)
                for key_name, key_type in athena_schema.iteritems()
            ])[:-1]
            query = ('CREATE EXTERNAL TABLE {table_name} ({schema})'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                         table_name=options.table_name,
                         schema=schema_statement,
                         bucket=options.bucket))

        elif options.type == 'alerts':
            if athena_client.check_table_exists(options.type):
                LOGGER_CLI.info('The \'alerts\' table already exists.')
                return

            query = ('CREATE EXTERNAL TABLE alerts ('
                     'log_source string,'
                     'log_type string,'
                     'outputs array<string>,'
                     'record string,'
                     'rule_description string,'
                     'rule_name string,'
                     'source_entity string,'
                     'source_service string)'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/alerts/\''.format(
                         bucket=options.bucket))

        if query:
            create_table_success, _ = athena_client.run_athena_query(
                query=query, database='streamalert')

            if create_table_success:
                CONFIG['lambda']['athena_partition_refresh_config'] \
                      ['refresh_type'][options.refresh_type][options.bucket] = options.type
                CONFIG.write()
                LOGGER_CLI.info('The %s table was successfully created!',
                                options.type)
def create_table(athena_client, options, config):
    """Create a 'streamalert' Athena table

    Args:
        athena_client (boto3.client): Instantiated CLI AthenaClient
        options (namedtuple): The parsed args passed from the CLI
        config (CLIConfig): Loaded StreamAlert CLI
    """
    if not options.bucket:
        LOGGER_CLI.error('Missing command line argument --bucket')
        return

    if not options.refresh_type:
        LOGGER_CLI.error('Missing command line argument --refresh_type')
        return

    if options.type == 'data':
        if not options.table_name:
            LOGGER_CLI.error('Missing command line argument --table_name')
            return

        if options.table_name not in terraform_cli_helpers.enabled_firehose_logs(
                config):
            LOGGER_CLI.error(
                'Table name %s missing from configuration or '
                'is not enabled.', options.table_name)
            return

        if athena_client.check_table_exists(options.table_name):
            LOGGER_CLI.info('The \'%s\' table already exists.',
                            options.table_name)
            return

        log_info = config['logs'][options.table_name.replace('_', ':', 1)]
        schema = dict(log_info['schema'])
        schema_statement = ''

        sanitized_schema = StreamAlert.sanitize_keys(schema)
        athena_schema = {}

        _add_to_athena_schema(sanitized_schema, athena_schema)

        # Support envelope keys
        configuration_options = log_info.get('configuration')
        if configuration_options:
            envelope_keys = configuration_options.get('envelope_keys')
            if envelope_keys:
                sanitized_envelope_key_schema = StreamAlert.sanitize_keys(
                    envelope_keys)
                # Note: this key is wrapped in backticks to be Hive compliant
                _add_to_athena_schema(sanitized_envelope_key_schema,
                                      athena_schema,
                                      '`streamalert:envelope_keys`')

        for key_name, key_type in athena_schema.iteritems():
            # Account for nested structs
            if isinstance(key_type, dict):
                struct_schema = ''.join([
                    '{0}:{1},'.format(sub_key, sub_type)
                    for sub_key, sub_type in key_type.iteritems()
                ])
                nested_schema_statement = '{0} struct<{1}>, '.format(
                    key_name,
                    # Use the minus index to remove the last comma
                    struct_schema[:-1])
                schema_statement += nested_schema_statement
            else:
                schema_statement += '{0} {1},'.format(key_name, key_type)

        query = (
            'CREATE EXTERNAL TABLE {table_name} ({schema}) '
            'PARTITIONED BY (dt string) '
            'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' '
            'WITH SERDEPROPERTIES ( \'ignore.malformed.json\' = \'true\') '
            'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                table_name=options.table_name,
                # Use the minus index to remove the last comma
                schema=schema_statement[:-1],
                bucket=options.bucket))

    elif options.type == 'alerts':
        if athena_client.check_table_exists(options.type):
            LOGGER_CLI.info('The \'alerts\' table already exists.')
            return

        query = ('CREATE EXTERNAL TABLE alerts ('
                 'log_source string,'
                 'log_type string,'
                 'outputs array<string>,'
                 'record string,'
                 'rule_description string,'
                 'rule_name string,'
                 'source_entity string,'
                 'source_service string)'
                 'PARTITIONED BY (dt string)'
                 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                 'LOCATION \'s3://{bucket}/alerts/\''.format(
                     bucket=options.bucket))

    if query:
        create_table_success, _ = athena_client.run_athena_query(
            query=query, database='streamalert')

        if create_table_success:
            # Update the CLI config
            config['lambda']['athena_partition_refresh_config'] \
                  ['refresh_type'][options.refresh_type][options.bucket] = options.type
            config.write()

            table_name = options.type if options.type == 'alerts' else options.table_name
            LOGGER_CLI.info('The %s table was successfully created!',
                            table_name)