Пример #1
0
    def __init__(self):
        config = load_config(include={'lambda.json', 'global.json'})
        prefix = config['global']['account']['prefix']
        athena_config = config['lambda']['athena_partitioner_config']
        self._file_format = get_data_file_format(config)

        if self._file_format == 'parquet':
            self._alerts_regex = self.ALERTS_REGEX_PARQUET
            self._data_regex = self.DATA_REGEX_PARQUET

        elif self._file_format == 'json':
            self._alerts_regex = self.ALERTS_REGEX
            self._data_regex = self.DATA_REGEX
        else:
            message = (
                'file format "{}" is not supported. Supported file format are '
                '"parquet", "json". Please update the setting in athena_partitioner_config '
                'in "conf/lambda.json"'.format(self._file_format))
            raise ConfigError(message)

        self._athena_buckets = athena_partition_buckets(config)

        db_name = get_database_name(config)

        # Get the S3 bucket to store Athena query results
        results_bucket = athena_config.get(
            'results_bucket',
            's3://{}-streamalert-athena-results'.format(prefix))

        self._s3_buckets_and_keys = defaultdict(set)

        self._create_client(db_name, results_bucket)
Пример #2
0
def get_athena_client(config):
    """Get an athena client using the current config settings

    Args:
        config (CLIConfig): Loaded StreamAlert config

    Returns:
        AthenaClient: instantiated client for performing athena actions
    """
    prefix = config['global']['account']['prefix']
    athena_config = config['lambda']['athena_partitioner_config']

    db_name = get_database_name(config)

    # Get the S3 bucket to store Athena query results
    results_bucket = athena_config.get(
        'results_bucket',
        's3://{}-streamalert-athena-results'.format(prefix)
    )

    return AthenaClient(
        db_name,
        results_bucket,
        'streamalert_cli',
        region=config['global']['account']['region']
    )
Пример #3
0
def _generate_global_module(config):
    # 2019-08-22 (Ryxias)
    #   In version 3.0.0+, StreamAlert will default to always using the prefix, when "use_prefix"
    #   is not present.
    #
    #   Refer to this PR for more information: https://github.com/airbnb/streamalert/pull/979
    use_prefix = config['global']['infrastructure'].get('classifier_sqs',
                                                        {}).get(
                                                            'use_prefix', True)

    global_module = {
        'source': './modules/tf_globals',
        'account_id': config['global']['account']['aws_account_id'],
        'region': config['global']['account']['region'],
        'prefix': config['global']['account']['prefix'],
        'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}',
        'rules_engine_timeout':
        config['lambda']['rules_engine_config']['timeout'],
        'sqs_use_prefix': use_prefix,
        'alerts_db_name': get_database_name(config),
        'alerts_file_format': get_data_file_format(config),
        'alerts_schema': generate_alerts_table_schema()
    }

    # The below code applies settings for resources only if the settings are explicitly
    # defined. This is because these resources have defaults defined in the
    # ./modules/tf_globals module. This will allow for overriding these setting, but
    # avoids storing defaults in mulitple locations
    if 'alerts_table' in config['global']['infrastructure']:
        for setting in {'read_capacity', 'write_capacity'}:
            value = config['global']['infrastructure']['alerts_table'].get(
                setting)
            if value:
                global_module['alerts_table_{}'.format(setting)] = value

    alert_fh_settings_with_defaults = {
        'bucket_name', 'buffer_size', 'buffer_interval',
        'cloudwatch_log_retention'
    }

    if 'alerts_firehose' in config['global']['infrastructure']:
        for setting in alert_fh_settings_with_defaults:
            value = config['global']['infrastructure']['alerts_firehose'].get(
                setting)
            if not value:
                continue

            global_module['alerts_firehose_{}'.format(setting)] = value

    if 'rule_staging' in config['global']['infrastructure']:
        if config['global']['infrastructure']['rule_staging'].get('enabled'):
            global_module['enable_rule_staging'] = True
            for setting in {'table_read_capacity', 'table_write_capacity'}:
                value = config['global']['infrastructure']['rule_staging'].get(
                    setting)
                if value:
                    # Defaults are set for this in the terraform module, so skip
                    global_module['rules_{}'.format(setting)] = value

    return global_module
Пример #4
0
def generate_artifact_extractor(config):
    """Generate Terraform for the Artifact Extractor Lambda function
    Args:
        config (dict): The loaded config from the 'conf/' directory
    Returns:
        dict: Artifact Extractor Terraform definition to be marshaled to JSON
    """
    result = infinitedict()

    if not artifact_extractor_enabled(config):
        return

    ae_config = config['global']['infrastructure']['artifact_extractor']
    stream_name = FirehoseClient.artifacts_firehose_stream_name(config)

    # Set variables for the artifact extractor module
    result['module']['artifact_extractor'] = {
        'source':
        './modules/tf_artifact_extractor',
        'account_id':
        config['global']['account']['aws_account_id'],
        'prefix':
        config['global']['account']['prefix'],
        'region':
        config['global']['account']['region'],
        'glue_catalog_db_name':
        get_database_name(config),
        'glue_catalog_table_name':
        ae_config.get('table_name', DEFAULT_ARTIFACTS_TABLE_NAME),
        's3_bucket_name':
        firehose_data_bucket(config),
        'stream_name':
        stream_name,
        'buffer_size':
        ae_config.get('firehose_buffer_size', 128),
        'buffer_interval':
        ae_config.get('firehose_buffer_interval', 900),
        'kms_key_arn':
        '${aws_kms_key.server_side_encryption.arn}',
        'schema':
        generate_artifacts_table_schema()
    }

    return result
Пример #5
0
    def __init__(self):
        self._config = load_config()
        prefix = self._config['global']['account']['prefix']

        # Create the rule table class for getting staging information
        self._rule_table = RuleTable('{}_streamalert_rules'.format(prefix))

        athena_config = self._config['lambda']['athena_partitioner_config']

        # Get the name of the athena database to access
        db_name = athena_config.get('database_name',
                                    get_database_name(self._config))

        # Get the S3 bucket to store Athena query results
        results_bucket = athena_config.get(
            'results_bucket',
            's3://{}-streamalert-athena-results'.format(prefix))

        self._athena_client = AthenaClient(db_name, results_bucket,
                                           self.ATHENA_S3_PREFIX)
        self._current_time = datetime.utcnow()
        self._staging_stats = dict()
Пример #6
0
def generate_firehose(logging_bucket, main_dict, config):
    """Generate the Firehose Terraform modules

    Args:
        config (CLIConfig): The loaded StreamAlert Config
        main_dict (infinitedict): The Dict to marshal to a file
        logging_bucket (str): The name of the global logging bucket
    """
    if not config['global']['infrastructure'].get('firehose',
                                                  {}).get('enabled'):
        return

    prefix = config['global']['account']['prefix']

    # This can return False but the check above ensures that that should never happen
    firehose_s3_bucket_name = firehose_data_bucket(config)

    firehose_conf = config['global']['infrastructure']['firehose']

    # Firehose Setup module
    main_dict['module']['kinesis_firehose_setup'] = {
        'source': './modules/tf_kinesis_firehose_setup',
        'account_id': config['global']['account']['aws_account_id'],
        'prefix': prefix,
        'region': config['global']['account']['region'],
        's3_logging_bucket': logging_bucket,
        's3_bucket_name': firehose_s3_bucket_name,
        'kms_key_id': '${aws_kms_key.server_side_encryption.key_id}'
    }

    enabled_logs = FirehoseClient.load_enabled_log_sources(firehose_conf,
                                                           config['logs'],
                                                           force_load=True)

    log_alarms_config = firehose_conf.get('enabled_logs', {})

    db_name = get_database_name(config)

    firehose_prefix = prefix if firehose_conf.get('use_prefix', True) else ''

    # Add the Delivery Streams individually
    for log_stream_name, log_type_name in enabled_logs.items():
        module_dict = {
            'source':
            './modules/tf_kinesis_firehose_delivery_stream',
            'buffer_size': (firehose_conf.get('buffer_size')),
            'buffer_interval': (firehose_conf.get('buffer_interval', 300)),
            'file_format':
            get_data_file_format(config),
            'stream_name':
            FirehoseClient.generate_firehose_name(firehose_prefix,
                                                  log_stream_name),
            'role_arn':
            '${module.kinesis_firehose_setup.firehose_role_arn}',
            's3_bucket_name':
            firehose_s3_bucket_name,
            'kms_key_arn':
            '${aws_kms_key.server_side_encryption.arn}',
            'glue_catalog_db_name':
            db_name,
            'glue_catalog_table_name':
            log_stream_name,
            'schema':
            generate_data_table_schema(config, log_type_name)
        }

        # Try to get alarm info for this specific log type
        alarm_info = log_alarms_config.get(log_type_name)
        if not alarm_info and ':' in log_type_name:
            # Fallback on looking for alarm info for the parent log type
            alarm_info = log_alarms_config.get(log_type_name.split(':')[0])

        if alarm_info and alarm_info.get('enable_alarm'):
            module_dict['enable_alarm'] = True

            # There are defaults of these defined in the terraform module, so do
            # not set the variable values unless explicitly specified
            if alarm_info.get('log_min_count_threshold'):
                module_dict['alarm_threshold'] = alarm_info.get(
                    'log_min_count_threshold')

            if alarm_info.get('evaluation_periods'):
                module_dict['evaluation_periods'] = alarm_info.get(
                    'evaluation_periods')

            if alarm_info.get('period_seconds'):
                module_dict['period_seconds'] = alarm_info.get(
                    'period_seconds')

            if alarm_info.get('alarm_actions'):
                if not isinstance(alarm_info.get('alarm_actions'), list):
                    module_dict['alarm_actions'] = [
                        alarm_info.get('alarm_actions')
                    ]
                else:
                    module_dict['alarm_actions'] = alarm_info.get(
                        'alarm_actions')
            else:
                module_dict['alarm_actions'] = [monitoring_topic_arn(config)]

        main_dict['module']['kinesis_firehose_{}'.format(
            log_stream_name)] = module_dict