示例#1
0
def test_pre_parse_s3_debug(s3_mock, log_mock, _):
    """S3Payload - Pre Parse, Debug On"""
    with patch.object(payload, 'LOGGER_DEBUG_ENABLED', True):

        records = ['_first_line_test_' * 10, '_second_line_test_' * 10]

        s3_mock.side_effect = [((100, records[0]), (200, records[1]))]

        raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
        s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)
        S3Payload.s3_object_size = 350

        _ = [_ for _ in s3_payload.pre_parse()]

        calls = [
            call(
                'Processed %s S3 records out of an approximate total of %s '
                '(average record size: %s bytes, total size: %s bytes)', 100,
                350, 1, 350),
            call(
                'Processed %s S3 records out of an approximate total of %s '
                '(average record size: %s bytes, total size: %s bytes)', 200,
                350, 1, 350)
        ]

        log_mock.assert_has_calls(calls)
示例#2
0
def test_s3_object_too_large():
    """S3Payload - S3ObjectSizeError, Object too Large"""
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)
    S3Payload.s3_object_size = (128 * 1024 * 1024) + 10

    s3_payload._download_object('region', 'bucket', 'key')
示例#3
0
    def test_mult_schema_match(self, log_mock):
        """StreamClassifier - Multiple Schema Matching with Log Patterns"""
        kinesis_data = json.dumps({
            'name': 'file removal test',
            'identifier': 'host4.this.test.also',
            'time': 'Jan 01 2017',
            'type': 'random',
            'message': 'bad_001.txt was removed'
        })
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = list(payload.pre_parse())[0]

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        self.classifier._check_schema_match(schema_matches)

        calls = [
            call('Log classification matched for multiple schemas: %s',
                 'test_multiple_schemas:01, test_multiple_schemas:02'),
            call('Proceeding with schema for: %s', 'test_multiple_schemas:01')
        ]

        log_mock.assert_has_calls(calls)
示例#4
0
    def test_mult_schema_match_success(self):
        """StreamClassifier - Multiple Schema Matching with Log Patterns, Success"""
        kinesis_data = json.dumps({
            'name': 'file added test',
            'identifier': 'host4.this.test',
            'time': 'Jan 01 2017',
            'type': 'lol_file_added_event_test',
            'message': 'bad_001.txt was added'
        })
        # Make sure support for multiple schema matching is ON
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = list(payload.pre_parse())[0]

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        assert_equal(schema_matches[0].log_name, 'test_multiple_schemas:01')
        assert_equal(schema_matches[1].log_name, 'test_multiple_schemas:02')
        schema_match = self.classifier._check_schema_match(schema_matches)

        assert_equal(schema_match.log_name, 'test_multiple_schemas:01')
示例#5
0
def test_pre_parse_s3_debug(s3_mock, log_mock, _):
    """S3Payload - Pre Parse, Debug On"""
    # Cache the logger level
    log_level = LOGGER.getEffectiveLevel()

    # Increase the logger level to debug
    LOGGER.setLevel(logging.DEBUG)

    records = ['_first_line_test_' * 10, '_second_line_test_' * 10]

    s3_mock.side_effect = [((100, records[0]), (200, records[1]))]

    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)
    S3Payload.s3_object_size = 350

    _ = [_ for _ in s3_payload.pre_parse()]

    calls = [
        call(
            'Processed %s S3 records out of an approximate total of %s '
            '(average record size: %s bytes, total size: %s bytes)', 100, 350,
            1, 350),
        call(
            'Processed %s S3 records out of an approximate total of %s '
            '(average record size: %s bytes, total size: %s bytes)', 200, 350,
            1, 350)
    ]

    log_mock.assert_has_calls(calls)

    # Reset the logger level and stop the patchers
    LOGGER.setLevel(log_level)
    def test_mult_schema_match_failure(self, log_mock):
        """StreamClassifier - Multiple Schema Matching with Log Patterns, Fail"""
        kinesis_data = json.dumps({
            'name': 'file removal test',
            'identifier': 'host4.this.test.also',
            'time': 'Jan 01 2017',
            'type': 'file_removed_event_test_file_added_event',
            'message': 'bad_001.txt was removed'
        })
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = payload.pre_parse().next()

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        self.classifier._check_schema_match(schema_matches)

        log_mock.assert_called_with('Proceeding with schema for: %s',
                                    'test_multiple_schemas:01')
示例#7
0
def test_s3_download_object_zero_size(*_):
    """S3Payload - Download Object of Zero Size"""
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name', 0)
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)

    assert_is_none(
        s3_payload._download_object('us-east-1', 'unit_bucket_name',
                                    'unit_key_name'))
示例#8
0
def test_get_object_ioerror(download_object_mock):
    """S3Payload - IOError Test"""
    download_object_mock.side_effect = IOError('No space left on device')
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)

    result = s3_payload._get_object()

    assert_equal(result, None)
示例#9
0
def test_get_object(log_mock, _):
    """S3Payload - Get S3 Info from Raw Record"""
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)

    s3_payload._get_object()
    log_mock.assert_called_with(
        'Pre-parsing record from S3. Bucket: %s, Key: %s, Size: %d',
        'unit_bucket_name', 'unit_key_name', 100)
示例#10
0
def test_s3_download_object(log_mock, *_):
    """S3Payload - Download Object"""
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)
    s3_payload._download_object('us-east-1', 'unit_bucket_name',
                                'unit_key_name')

    assert_equal(log_mock.call_args_list[1][0][0],
                 'Completed download in %s seconds')
示例#11
0
    def _prepare_and_classify_payload(self, service, entity, raw_record):
        """Helper method to return a preparsed and classified payload"""
        payload = load_stream_payload(service, entity, raw_record)

        payload = list(payload.pre_parse())[0]
        self.classifier.load_sources(service, entity)
        self.classifier.classify_record(payload)

        return payload
示例#12
0
def mock_normalized_records(default_data=None):
    """Morck records which have been normalized"""
    if not default_data:
        default_data = [{
            'account': 12345,
            'region': '123456123456',
            'detail': {
                'eventName': 'ConsoleLogin',
                'userIdentity': {
                    'userName': '******',
                    'accountId': '12345'
                },
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '12345'
            },
            'source': '1.1.1.2',
            'streamalert:normalization': {
                'sourceAddress': [['detail', 'sourceIPAddress'], ['source']],
                'usernNme': [['detail', 'userIdentity', 'userName']]
            }
        }, {
            'domain': 'evil.com',
            'pc_name': 'test-pc',
            'date': 'Dec 1st, 2016',
            'data': 'ABCDEF',
            'streamalert:normalization': {
                'destinationDomain': [['domain']]
            }
        }, {
            'domain': 'evil2.com',
            'pc_name': 'test-pc',
            'date': 'Dec 1st, 2016',
            'data': 'ABCDEF',
            'streamalert:normalization': {
                'destinationDomain': [['domain']]
            }
        }, {
            'process_md5': 'abcdef0123456789',
            'server': 'test-server',
            'date': 'Dec 2nd, 2016',
            'data': 'Foo',
            'streamalert:normalization': {
                'fileHash': [['process_md5']]
            }
        }]

    kinesis_payload = []
    for record in default_data:
        entity = 'unit_test_entity'
        raw_record = make_kinesis_raw_record(entity, 'None')
        payload = load_stream_payload('kinesis', entity, raw_record)
        payload = payload.pre_parse().next()
        payload.pre_parsed_record = record
        kinesis_payload.append(payload)

    return kinesis_payload
示例#13
0
def test_s3_download_object(*_):
    """S3Payload - Download Object"""
    key = 'test/unit/s3-object.gz'
    raw_record = make_s3_raw_record('unit_bucket_name', key)
    s3_payload = load_stream_payload('s3', key, raw_record)
    S3Payload.s3_object_size = (1024 * 1024)
    downloaded_path = s3_payload._download_object('us-east-1',
                                                  'unit_bucket_name', key)

    assert_true(downloaded_path.endswith('test-unit-s3-object.gz'))
示例#14
0
def test_pre_parse_s3(s3_mock, *_):
    """S3Payload - Pre Parse"""
    records = ['{"record01": "value01"}', '{"record02": "value02"}']
    s3_mock.side_effect = [((0, records[0]), (1, records[1]))]

    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)

    for index, record in enumerate(s3_payload.pre_parse()):
        assert_equal(record.pre_parsed_record, records[index])
示例#15
0
def load_and_classify_payload(config, service, entity, raw_record):
    """Return a loaded and classified payload."""
    # prepare the payloads
    payload = load_stream_payload(service, entity, raw_record)

    payload = list(payload.pre_parse())[0]
    classifier = StreamClassifier(config=config)
    classifier.load_sources(service, entity)
    classifier.classify_record(payload)

    return payload
示例#16
0
def test_s3_download_object_mb(log_mock, *_):
    """S3Payload - Download Object, Size in MB"""
    raw_record = make_s3_raw_record('unit_bucket_name', 'unit_key_name')
    s3_payload = load_stream_payload('s3', 'unit_key_name', raw_record)
    S3Payload.s3_object_size = (127.8 * 1024 * 1024)
    s3_payload._download_object('us-east-1', 'unit_bucket_name', 'unit_key_name')

    assert_equal(log_mock.call_args_list[0],
                 call('[S3Payload] Starting download from S3: %s/%s [%s]',
                      'unit_bucket_name', 'unit_key_name', '127.8MB'))

    assert_equal(log_mock.call_args_list[1][0][0], 'Completed download in %s seconds')
示例#17
0
def test_repr_string():
    """StreamPayload - String Representation"""
    s3_payload = load_stream_payload('s3', 'entity', 'record')

    # Set some values that are different than the defaults
    s3_payload.type = 'unit_type'
    s3_payload.log_source = 'unit_source'
    s3_payload.records = ['rec1', 'rec2']
    print_value = ('<S3Payload valid:False log_source:unit_source '
                   'entity:entity type:unit_type '
                   'record:[\'rec1\', \'rec2\']>')

    output_print = s3_payload.__repr__()
    assert_equal(output_print, print_value)
示例#18
0
def test_pre_parse_sns(log_mock):
    """SNSPayload - Pre Parse"""
    sns_data = json.dumps({'test': 'value'})
    raw_record = make_sns_raw_record('unit_topic', sns_data)
    sns_payload = load_stream_payload('sns', 'entity', raw_record)

    sns_payload = sns_payload.pre_parse().next()

    assert_equal(sns_payload.pre_parsed_record, '{"test": "value"}')

    log_mock.assert_called_with(
        'Pre-parsing record from SNS. '
        'MessageId: %s, EventSubscriptionArn: %s', 'unit test message id',
        'arn:aws:sns:us-east-1:123456789012:unit_topic')
示例#19
0
def test_pre_parse_kinesis(log_mock):
    """KinesisPayload - Pre Parse"""
    kinesis_data = json.dumps({'test': 'value'})
    entity = 'unit_test_entity'
    raw_record = make_kinesis_raw_record(entity, kinesis_data)
    kinesis_payload = load_stream_payload('kinesis', entity, raw_record)

    kinesis_payload = kinesis_payload.pre_parse().next()

    assert_equal(kinesis_payload.pre_parsed_record, '{"test": "value"}')

    log_mock.assert_called_with(
        'Pre-parsing record from Kinesis. '
        'eventID: %s, eventSourceARN: %s', 'unit test event id',
        'arn:aws:kinesis:us-east-1:123456789012:stream/{}'.format(entity))
示例#20
0
def test_refresh_record():
    """StreamPayload - Refresh Record"""
    s3_payload = load_stream_payload('s3', 'entity', 'record')

    # Set some values that are different than the defaults
    s3_payload.type = 'unit_type'
    s3_payload.log_source = 'unit_source'
    s3_payload.records = ['rec1']
    s3_payload.valid = True

    s3_payload._refresh_record('new pre_parsed_record')

    assert_equal(s3_payload.pre_parsed_record, 'new pre_parsed_record')
    assert_is_none(s3_payload.type)
    assert_is_none(s3_payload.log_source)
    assert_is_none(s3_payload.records)
    assert_false(s3_payload.valid)
示例#21
0
    def _validate_test_records(self, rule_name, test_record, formatted_record,
                               print_header_line):
        """Function to validate test records and log any errors

        Args:
            rule_name (str): The rule name being tested
            test_record (dict): A single record to test
            formatted_record (dict): A dictionary that includes the 'data' from the
                test record, formatted into a structure that is resemblant of how
                an incoming record from a service would format it.
                See test/integration/templates for example of how each service
                formats records.
        """
        service, entity = self.processor.classifier.extract_service_and_entity(
            formatted_record)

        if not self.processor.classifier.load_sources(service, entity):
            self.all_tests_passed = False
            return

        # Create the StreamPayload to use for encapsulating parsed info
        payload = load_stream_payload(service, entity, formatted_record)
        if not payload:
            self.all_tests_passed = False
            return

        if print_header_line:
            print '\n{}'.format(rule_name)

        for record in payload.pre_parse():
            self.processor.classifier.classify_record(record)

            if not record.valid:
                self.all_tests_passed = False
                self.analyze_record_delta(rule_name, test_record)

            report_output(record.valid, [
                '[log=\'{}\']'.format(record.log_source or 'unknown'),
                'validation',
                record.service(), test_record['description']
            ])
示例#22
0
    def _validate_test_record(self, file_name, test_event, formatted_record,
                              print_header_line):
        """Function to validate test records and log any errors

        Args:
            file_name (str): The base name of the test event file.
            test_event (dict): A single test event containing the record and other detail
            formatted_record (dict): A dictionary that includes the 'data' from the
                test record, formatted into a structure that is resemblant of how
                an incoming record from a service would format it.
                See test/integration/templates for example of how each service
                formats records.
            print_header_line (bool): Indicates if this is the first record from
                a test file, and therefore we should print some header information
        """
        service, entity = self.processor.classifier.extract_service_and_entity(
            formatted_record)

        if not self.processor.classifier.load_sources(service, entity):
            return False

        # Create the StreamPayload to use for encapsulating parsed info
        payload = load_stream_payload(service, entity, formatted_record)
        if not payload:
            return False

        if print_header_line:
            print '\n{}'.format(file_name)

        for record in payload.pre_parse():
            self.processor.classifier.classify_record(record)

            if not record.valid:
                self.all_tests_passed = False
                self.analyze_record_delta(file_name, test_event)

            report_output(record.valid, [
                '[log=\'{}\']'.format(record.log_source or 'unknown'),
                'validation',
                record.service(), test_event['description']
            ])
示例#23
0
    def test_parse_convert_fail(self, log_mock):
        """StreamClassifier - Convert Failed"""
        service, entity = 'kinesis', 'unit_test_default_stream'

        result = self.classifier.load_sources(service, entity)

        assert_true(result)

        kinesis_data = json.dumps({
            'unit_key_01': 'not an integer',
            'unit_key_02': 'valid string'
        })

        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)
        payload = list(payload.pre_parse())[0]

        result = self.classifier._parse(payload)

        assert_false(result)

        log_mock.assert_called_with(
            'Invalid schema. Value for key [%s] is not an int: %s',
            'unit_key_01', 'not an integer')
示例#24
0
def test_get_service_sns():
    """StreamPayload - Get Service, SNS"""
    sns_payload = load_stream_payload('sns', 'entity', 'record')

    assert_equal(sns_payload.service(), 'sns')
示例#25
0
def test_get_service_s3():
    """StreamPayload - Get Service, S3"""
    s3_payload = load_stream_payload('s3', 'entity', 'record')

    assert_equal(s3_payload.service(), 's3')
示例#26
0
def test_get_service_kinesis():
    """StreamPayload - Get Service, Kinesis"""
    kinesis_payload = load_stream_payload('kinesis', 'entity', 'record')

    assert_equal(kinesis_payload.service(), 'kinesis')
示例#27
0
def test_load_payload_invalid(log_mock):
    """StreamPayload - Loading Stream Payload, Invalid"""
    load_stream_payload('blah', 'entity', 'record')

    log_mock.assert_called_with('Service payload not supported: %s', 'blah')
示例#28
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains
        available data sources, log schemas, normalized types, and outputs.
        Classifies logs sent into a parsed type.
        Matches records against rules.

        Args:
            event (dict): An AWS event mapped to a specific source/entity
                containing data read by Lambda.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of incoming records: %d', len(records))
        if not records:
            return False

        firehose_config = self.config['global'].get('infrastructure',
                                                    {}).get('firehose', {})
        if firehose_config.get('enabled'):
            self._firehose_client = StreamAlertFirehose(
                self.env['lambda_region'], firehose_config,
                self.config['logs'])

        payload_with_normalized_records = []
        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(
                raw_record)
            if not service:
                LOGGER.error(
                    'No valid service found in payload\'s raw record. Skipping '
                    'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            payload_with_normalized_records.extend(
                self._process_alerts(payload))

        # Log normalized records metric
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.NORMALIZED_RECORDS,
                                len(payload_with_normalized_records))

        # Apply Threat Intel to normalized records in the end of Rule Processor invocation
        record_alerts = self._rules_engine.threat_intel_match(
            payload_with_normalized_records)
        self._alerts.extend(record_alerts)
        if record_alerts:
            self.alert_forwarder.send_alerts(record_alerts)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS,
                                self._processed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS,
                                len(self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug(
                'Alerts:\n%s',
                json.dumps([alert.output_dict() for alert in self._alerts],
                           indent=2,
                           sort_keys=True))

        if self._firehose_client:
            self._firehose_client.send()

        # Only log rule info here if this is not running tests
        # During testing, this gets logged at the end and printing here could be confusing
        # since stress testing calls this method multiple times
        if self.env['lambda_alias'] != 'development':
            stats.print_rule_stats(True)

        return self._failed_record_count == 0
示例#29
0
def test_load_payload_valid():
    """StreamPayload - Loading Stream Payload, Valid"""
    payload = load_stream_payload('s3', 'entity', 'record')

    assert_is_instance(payload, S3Payload)
示例#30
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains
        available data sources, log schemas, normalized types, and outputs.
        Classifies logs sent into a parsed type.
        Matches records against rules.

        Args:
            event (dict): An AWS event mapped to a specific source/entity
                containing data read by Lambda.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of Records: %d', len(records))
        if not records:
            return False

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, len(records))

        firehose_config = self.config['global'].get(
            'infrastructure', {}).get('firehose', {})
        if firehose_config.get('enabled'):
            self.firehose_client = boto3.client('firehose',
                                                region_name=self.env['lambda_region'])

        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(raw_record)
            if not service:
                LOGGER.error('No valid service found in payload\'s raw record. Skipping '
                             'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            self._process_alerts(payload)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(
                self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2))

        if self.firehose_client:
            self._send_to_firehose()

        return self._failed_record_count == 0