Пример #1
0
    def _dispatch_alert(self, alert):
        """Dispatch a single alert to the alert processor."""
        alert.attempts += 1
        LOGGER.info('Dispatching %s to %s (attempt %d)', alert,
                    self.alert_proc, alert.attempts)
        MetricLogger.log_metric(ALERT_MERGER_NAME, MetricLogger.ALERT_ATTEMPTS,
                                alert.attempts)

        record_payload = json.dumps(alert.dynamo_record(),
                                    cls=Alert.AlertEncoder,
                                    separators=(',', ':'))
        if len(record_payload) <= self.MAX_LAMBDA_PAYLOAD_SIZE:
            # The entire alert fits in the Lambda payload - send it all
            payload = record_payload
        else:
            # The alert is too big - the alert processor will have to pull it from Dynamo
            payload = json.dumps(alert.dynamo_key)

        self.lambda_client.invoke(FunctionName=self.alert_proc,
                                  InvocationType='Event',
                                  Payload=payload,
                                  Qualifier='production')

        alert.dispatched = datetime.utcnow()
        self.table.mark_as_dispatched(alert)
Пример #2
0
    def _log_failed(cls, count):
        """Helper to log the failed Firehose records metric

        Args:
            count (int): Number of failed records
        """
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, count)
Пример #3
0
    def _finalize(cls, response, stream_name, size):
        """Perform any final operations for this response, such as metric logging, etc

        Args:
            response (dict): boto3 client response object
            stream_name (str): The name of the Delivery Stream to send to
            size (int): The original size of the batch being sent
        """
        if not response:
            return  # Could happen in the case of backoff failing enitrely

        # Check for failures that occurred in PutRecordBatch after several backoff attempts
        if response.get('FailedPutCount'):
            failed_records = [
                failed for failed in response['RequestResponses']
                if failed.get('ErrorCode')
            ]
            cls._log_failed(response['FailedPutCount'])

            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error(
                'Failed to put the following records to firehose %s: %s',
                stream_name,
                json.dumps(failed_records[:1024], indent=2)
            )
            return

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, size)
        LOGGER.info(
            'Successfully sent %d message(s) to firehose %s with RequestId \'%s\'',
            size,
            stream_name,
            response.get('ResponseMetadata', {}).get('RequestId', '')
        )
Пример #4
0
    def _segment_records(cls, records):

        batch_size = 2  # for [] chars on array
        batch = []
        record_count = len(records)
        for idx, record in enumerate(records, start=1):
            # for , between records
            size = len(record) + (1 if idx != record_count and batch else 0)
            if size + 2 > cls.MAX_SIZE:
                LOGGER.error('Record is too large to send to SQS:\n%s', record)
                MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.SQS_FAILED_RECORDS, 1)
                continue

            if idx == record_count or size + batch_size >= cls.MAX_SIZE:
                if size + batch_size >= cls.MAX_SIZE:
                    yield batch[:], len(batch)

                if idx == record_count:  # the end of the records
                    if size + batch_size < cls.MAX_SIZE:  # this record fits on current batch
                        batch.append(record)
                        yield batch[:], len(batch)
                    else:
                        yield [record], 1
                    return

                del batch[:]
                batch_size = 2

            batch.append(record)
            batch_size += size
Пример #5
0
    def _download_object(self, region, bucket, key):
        """Download an object from S3.

        Verifies the S3 object is less than or equal to 128MB, and
        downloads it into a temp file.  Lambda can only execute for a
        maximum of 300 seconds, and the file to download
        greatly impacts that time.

        Args:
            region (str): AWS region to use for boto client instance.
            bucket (str): S3 bucket to download object from.
            key (str): Key of s3 object.

        Returns:
            str: The downloaded path of the S3 object.
        """
        size_kb = round(self.s3_object_size / 1024.0, 2)
        size_mb = round(size_kb / 1024.0, 2)
        display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format(
            size_kb)

        # File size checks before downloading
        if size_kb == 0:
            return
        elif size_mb > 128:
            raise S3ObjectSizeError(
                '[S3Payload] The S3 object {}/{} is too large [{}] to download '
                'from S3'.format(bucket, key, display_size))

        # Shred the temp dir before downloading
        self._shred_temp_directory()
        # Bandit warns about using a shell process, ignore with #nosec
        LOGGER.debug(
            os.popen('df -h /{} | tail -1'.format(  #nosec
                tempfile.gettempdir())).read().strip())
        LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]',
                    bucket, key, display_size)

        # Convert the S3 object name to store as a file in the Lambda container
        suffix = key.replace('/', '-')
        file_descriptor, downloaded_s3_object = tempfile.mkstemp(suffix=suffix)

        with open(downloaded_s3_object, 'wb') as data:
            client = boto3.client('s3', region_name=region)
            start_time = time.time()
            client.download_fileobj(bucket, key, data)

        # Explicitly call os.close on the underlying open file descriptor
        # Addresses https://github.com/airbnb/streamalert/issues/587
        os.close(file_descriptor)

        total_time = time.time() - start_time
        LOGGER.info('Completed download in %s seconds', round(total_time, 2))

        # Log a metric on how long this object took to download
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME,
                                total_time)

        return downloaded_s3_object
Пример #6
0
    def run(self, records):
        """Run rules against the records sent from the Classifier function

        Args:
            records (list): Dictionaries of records sent from the classifier function
                Record Format:
                    {
                        'cluster': 'prod',
                        'log_schema_type': 'cloudwatch:cloudtrail',
                        'record': {
                            'key': 'value'
                        },
                        'service': 'kinesis',
                        'resource': 'kinesis_stream_name'
                        'data_type': 'json'
                    }

        Returns:
            list: Alerts that have been triggered by this data
        """
        LOGGER.info('Processing %d records', len(records))

        # Extract any threat intelligence matches from the records
        self._extract_threat_intel(records)

        alerts = []
        for payload in records:
            rules = Rule.rules_for_log_type(payload['log_schema_type'])
            if not rules:
                LOGGER.debug('No rules to process for %s', payload)
                continue

            for rule in rules:
                # subkey check
                if not self._process_subkeys(payload['record'], rule):
                    continue

                # matcher check
                if not rule.check_matchers(payload['record']):
                    continue

                alert = self._rule_analysis(payload, rule)
                if alert:
                    alerts.append(alert)

        self._alert_forwarder.send_alerts(alerts)

        # Only log rule info here if this is deployed in Lambda
        # During testing, this gets logged at the end and printing here could be confusing
        # since stress testing calls this method multiple times
        if self._in_lambda:
            LOGGER.info(get_rule_stats(True))

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS,
                                len(alerts))

        return alerts
Пример #7
0
 def _check_record_batch(batch):
     """Helper function to verify record size"""
     for index, record in enumerate(batch):
         if len(str(record)) > MAX_RECORD_SIZE:
             # Show the first 1k bytes in order to not overload
             # CloudWatch logs
             LOGGER.error('The following record is too large'
                          'be sent to Firehose: %s', str(record)[:1000])
             MetricLogger.log_metric(FUNCTION_NAME,
                                     MetricLogger.FIREHOSE_FAILED_RECORDS,
                                     1)
             batch.pop(index)
Пример #8
0
    def _download_object(self, region, bucket, key):
        """Download an object from S3.

        Verifies the S3 object is less than or equal to 128MB, and
        downloads it into a temp file.  Lambda can only execute for a
        maximum of 300 seconds, and the file to download
        greatly impacts that time.

        Args:
            region (str): AWS region to use for boto client instance.
            bucket (str): S3 bucket to download object from.
            key (str): Key of s3 object.

        Returns:
            str: The downloaded path of the S3 object.
        """
        size_kb = self.s3_object_size / 1024.0
        size_mb = size_kb / 1024.0

        # File size checks before downloading
        if size_kb == 0:
            return
        elif size_mb > 128:
            raise S3ObjectSizeError('S3 object to download is above 128MB')

        # Bandit warns about using a shell process, ignore with #nosec
        LOGGER.debug(os.popen('df -h /tmp | tail -1').read().strip())  # nosec

        display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format(
            size_kb)

        LOGGER.info('Starting download from S3: %s/%s [%s]', bucket, key,
                    display_size)

        # Convert the S3 object name to store as a file in the Lambda container
        suffix = key.replace('/', '-')
        _, downloaded_s3_object = tempfile.mkstemp(suffix=suffix)

        with open(downloaded_s3_object, 'wb') as data:
            client = boto3.client('s3', region_name=region)
            start_time = time.time()
            client.download_fileobj(bucket, key, data)

        total_time = time.time() - start_time
        LOGGER.info('Completed download in %s seconds', round(total_time, 2))

        # Log a metric on how long this object took to download
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME,
                                total_time)

        return downloaded_s3_object
Пример #9
0
    def _pre_parse(self):
        """Pre-parsing method for incoming app records

        This iterates over all the incoming logs in the 'logs' list.

        Yields:
            Instances of PayloadRecord back to the caller containing the current log data
        """
        for data in self.raw_record['logs']:
            yield PayloadRecord(data)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_STREAM_ALERT_APP_RECORDS,
                                len(self.raw_record['logs']))
Пример #10
0
    def send_alerts(self, alerts):
        """Send alerts to the Alert Processor and to the alerts Dynamo table.

        Args:
            alerts (list): A list of dictionaries representing json alerts.
        """
        try:
            self._send_to_dynamo(alerts)
        except ClientError:
            # The batch_writer() automatically retries transient errors - any raised ClientError
            # is likely unrecoverable. Log an exception and metric
            LOGGER.exception('Error saving alerts to Dynamo')
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FAILED_DYNAMO_WRITES, 1)
Пример #11
0
    def _dispatch_alert(self, alert):
        """Dispatch all alerts which need to be sent to the rule processor."""
        this_attempt_num = alert.get('Attempts', 0) + 1
        LOGGER.info('Dispatching alert %s to %s (attempt %d)',
                    alert['AlertID'], self.alert_proc, this_attempt_num)
        MetricLogger.log_metric(ALERT_MERGER_NAME, MetricLogger.ALERT_ATTEMPTS,
                                this_attempt_num)

        self.lambda_client.invoke(FunctionName=self.alert_proc,
                                  InvocationType='Event',
                                  Payload=json.dumps(alert,
                                                     cls=AlertEncoder,
                                                     separators=(',', ':')),
                                  Qualifier='production')
        self.alerts_db.mark_as_dispatched(alert['RuleName'], alert['AlertID'])
Пример #12
0
    def _limit_record_size(cls, batch):
        """Limits the batch size sent to Firehose by popping large records

        Args:
            batch (list): Record batch to iterate on
        """
        for index, record in enumerate(batch):
            if len(json.dumps(record, separators=(",", ":"))) > cls.MAX_RECORD_SIZE:
                # Show the first 1k bytes in order to not overload CloudWatch logs
                LOGGER.error('The following record is too large'
                             'be sent to Firehose: %s', str(record)[:1000])
                MetricLogger.log_metric(FUNCTION_NAME,
                                        MetricLogger.FIREHOSE_FAILED_RECORDS,
                                        1)
                batch.pop(index)
Пример #13
0
    def send_alerts(self, alerts):
        """Send alerts to the Dynamo table.

        Args:
            alerts (list): A list of Alert instances to save to Dynamo.
        """
        try:
            self._table.add_alerts(alerts)
            LOGGER.info('Successfully sent %d alert(s) to dynamo:%s',
                        len(alerts), self._table.name)
        except ClientError:
            # add_alerts() automatically retries transient errors - any raised ClientError
            # is likely unrecoverable. Log an exception and metric
            LOGGER.exception('Error saving alerts to Dynamo')
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FAILED_DYNAMO_WRITES, 1)
Пример #14
0
    def pre_parse(self):
        """Pre-parsing method for incoming app records that iterates over all the
        incoming logs in the 'logs' list.

        Yields:
            Instances of `self` back to the caller with the proper
                `pre_parsed_record` set to the current log data. This conforms
                to the interface of returning a generator, providing the ability
                to support multiple records like this.
        """
        for data in self.raw_record['logs']:

            self._refresh_record(data)
            yield self

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_STREAM_ALERT_APP_RECORDS,
                                len(self.raw_record['logs']))
Пример #15
0
    def _pre_parse(self):
        """Pre-parsing method for S3 objects

        Downloads the s3 object into the system's temp directory for reading. The
        file is kept open as a tempfile.TemporaryFile to ensure proper cleanup
        when reading finishes.

        Yields:
            Instances of PayloadRecord back to the caller containing the current log data
        """
        self._check_size()

        line_num = 0
        for line_num, data in self._read_file():
            yield PayloadRecord(data)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_S3_RECORDS,
                                line_num)
Пример #16
0
    def _finalize(self, response, count):
        """Perform any final operations for this response, such as metric logging, etc

        Args:
            response (string|bool): MessageId or False if this request failed
            count (int): The size of the batch being sent to be logged as successful or failed
        """
        if not response:  # Could happen in the case of backoff failing enitrely
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.SQS_FAILED_RECORDS, count)
            return

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.SQS_RECORDS_SENT,
                                count)

        LOGGER.debug(
            'Successfully sent message with %d records to %s with MessageId %s',
            count, self.queue.url, response)
Пример #17
0
    def pre_parse(self):
        """Pre-parsing method for S3 objects that will download the s3 object,
        open it for reading and iterate over lines (records) in the file.
        This yields back references of this S3Payload instance to the caller
        with a propertly set `pre_parsed_record` for this record.

        Yields:
            Instances of `self` back to the caller with the
                proper `pre_parsed_record` set. Conforms to the interface of
                returning a generator, providing the ability to support
                multi-record like this (s3).
        """
        s3_file_path = self._get_object()
        if not s3_file_path:
            return

        line_num, processed_size = 0, 0
        for line_num, data in self._read_downloaded_s3_object(s3_file_path):

            self._refresh_record(data)
            yield self

            # Only do the extra calculations below if debug logging is enabled
            if not LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
                continue

            # Add the current data to the total processed size
            # +1 to account for line feed
            processed_size += (len(data) + 1)

            # Log a debug message on every 100 lines processed
            if line_num % 100 == 0:
                avg_record_size = ((processed_size - 1) / line_num)
                if avg_record_size:
                    approx_record_count = self.s3_object_size / avg_record_size
                    LOGGER.debug(
                        'Processed %s S3 records out of an approximate total of %s '
                        '(average record size: %s bytes, total size: %s bytes)',
                        line_num,
                        approx_record_count,
                        avg_record_size,
                        self.s3_object_size)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_S3_RECORDS, line_num)
Пример #18
0
    def _read_file(self):
        """Download and read the contents of the S3 file

        Yields:
            tuple: line number, contents of the line being read
        """
        bucket = self._unquote(self.bucket)
        key = self._unquote(self.key)

        # Use tempfile.TemporaryFile to do the download
        # This will automatically close/get garbage collected upon completion
        with tempfile.TemporaryFile() as download:
            client = boto3.resource('s3',
                                    region_name=self.region).Bucket(bucket)
            start_time = time.time()
            LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]',
                        bucket, key, self.size)

            try:
                client.download_fileobj(key, download)
            except (IOError, ClientError):
                LOGGER.exception('Failed to download object from S3')
                return

            total_time = time.time() - start_time
            LOGGER.info('Completed download in %s seconds',
                        round(total_time, 2))

            # Log a metric on how long this object took to download
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.S3_DOWNLOAD_TIME, total_time)

            for line_num, line in self._read_downloaded_object(download):
                yield line_num, line

            # Reading was a success, so truncate the file contents and return
            download.seek(0)
            download.truncate()
Пример #19
0
    def _finalize(self, response, batch):
        """Perform any final operations for this response, such as metric logging, etc

        Args:
            batch (list): List of JSON records that are being sent to SQS
            response (dict): boto3 client response object
            size (int): The original size of the batch being sent
        """
        if not response:
            return  # Could happen in the case of backoff failing enitrely

        # Check for failures that occurred in PutRecordBatch after several backoff attempts
        # And log the actual record from the batch
        failed = self._check_failures(response, batch=batch)

        # Remove the failed messages in this batch for an accurate metric
        successful_records = len(batch) - failed

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.SQS_RECORDS_SENT, successful_records)
        LOGGER.info(
            'Successfully sent %d message(s) to queue %s',
            successful_records,
            self.queue.url
        )
Пример #20
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        record_batch_size = len(record_batch)
        resp = {}

        try:
            LOGGER.debug('Sending %d records to Firehose:%s',
                         record_batch_size,
                         stream_name)
            resp = self.firehose_client.put_record_batch(
                DeliveryStreamName=stream_name,
                # The newline at the end is required by Firehose,
                # otherwise all records will be on a single line and
                # unsearchable in Athena.
                Records=[{'Data': json.dumps(self.sanitize_keys(record),
                                             separators=(",", ":")) + '\n'}
                         for record
                         in record_batch])
        except ClientError as firehose_err:
            LOGGER.error(firehose_err)
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    record_batch_size)
            return

        # Error handle if failures occured in PutRecordBatch
        # TODO(jack) implement backoff here for additional message reliability
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('The following records failed to Put to the'
                         'Delivery stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    record_batch_size)
            LOGGER.info('Successfully sent %d messages to Firehose:%s',
                        record_batch_size,
                        stream_name)
Пример #21
0
    def _log_metrics(self):
        """Perform some metric logging before exiting"""
        MetricLogger.log_metric(
            FUNCTION_NAME,
            MetricLogger.TOTAL_RECORDS,
            sum(len(payload.parsed_records) for payload in self._payloads)
        )
        MetricLogger.log_metric(
            FUNCTION_NAME,
            MetricLogger.NORMALIZED_RECORDS,
            sum(
                1 for payload in self._payloads
                for log in payload.parsed_records if log.get(Normalizer.NORMALIZATION_KEY)
            )
        )
        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.TOTAL_PROCESSED_SIZE, self._processed_size
        )

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)
        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count
        )
Пример #22
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        resp = {}
        record_batch_size = len(record_batch)
        exceptions_to_backoff = (ClientError, ConnectionError)

        @backoff.on_predicate(backoff.fibo,
                              lambda resp: resp['FailedPutCount'] > 0,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              max_value=self.MAX_BACKOFF_FIBO_VALUE,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler,
                              on_success=success_handler,
                              on_giveup=giveup_handler)
        @backoff.on_exception(backoff.fibo,
                              exceptions_to_backoff,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler,
                              on_success=success_handler,
                              on_giveup=giveup_handler)
        def firehose_request_wrapper(data):
            """Firehose request wrapper to use with backoff"""
            LOGGER.info('[Firehose] Sending %d records to %s',
                        record_batch_size,
                        stream_name)
            return self._firehose_client.put_record_batch(
                DeliveryStreamName=stream_name,
                Records=data)

        # The newline at the end is required by Firehose,
        # otherwise all records will be on a single line and
        # unsearchable in Athena.
        records_data = [
            {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'}
            for record in record_batch
        ]

        # The try/except here is to catch the raised error at the
        # end of the backoff.
        try:
            resp = firehose_request_wrapper(records_data)
        except exceptions_to_backoff as firehose_err:
            LOGGER.error(firehose_err)
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    record_batch_size)
            return

        # Error handle if failures occurred in PutRecordBatch after
        # several backoff attempts
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('[Firehose] The following records failed to put to '
                         'the Delivery Stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    record_batch_size)
            LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]',
                        record_batch_size,
                        stream_name,
                        resp.get('ResponseMetadata', {}).get('RequestId', ''))
Пример #23
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains
        available data sources, log schemas, normalized types, and outputs.
        Classifies logs sent into a parsed type.
        Matches records against rules.

        Args:
            event (dict): An AWS event mapped to a specific source/entity
                containing data read by Lambda.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of Records: %d', len(records))
        if not records:
            return False

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, len(records))

        firehose_config = self.config['global'].get(
            'infrastructure', {}).get('firehose', {})
        if firehose_config.get('enabled'):
            self.firehose_client = boto3.client('firehose',
                                                region_name=self.env['lambda_region'])

        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(raw_record)
            if not service:
                LOGGER.error('No valid service found in payload\'s raw record. Skipping '
                             'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            self._process_alerts(payload)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(
                self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2))

        if self.firehose_client:
            self._send_to_firehose()

        return self._failed_record_count == 0
Пример #24
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        exceptions_to_backoff = (ClientError, ConnectionError, Timeout)

        @backoff.on_predicate(backoff.fibo,
                              lambda resp: resp['FailedPutCount'] > 0,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              max_value=self.MAX_BACKOFF_FIBO_VALUE,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler(debug_only=False),
                              on_success=success_handler(),
                              on_giveup=giveup_handler())
        @backoff.on_exception(backoff.fibo,
                              exceptions_to_backoff,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler(debug_only=False),
                              on_success=success_handler(),
                              on_giveup=giveup_handler())
        def firehose_request_wrapper(data):
            """Firehose request wrapper to use with backoff"""
            # Use the current length of data here so we can track failed records that are retried
            LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name)

            response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data)

            # Log this as an error for now so it can be picked up in logs
            if response['FailedPutCount'] > 0:
                LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount'])
                # Strip out the successful records so only the failed ones are retried. This happens
                # to the list of dictionary objects, so the called function sees the updated list
                self._strip_successful_records(data, response)

            return response

        original_batch_size = len(record_batch)

        # The newline at the end is required by Firehose,
        # otherwise all records will be on a single line and
        # unsearchable in Athena.
        records_data = [
            {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'}
            for record in record_batch
        ]

        # The try/except here is to catch the raised error at the end of the backoff
        try:
            resp = firehose_request_wrapper(records_data)
        except exceptions_to_backoff as firehose_err:
            LOGGER.error(firehose_err)
            # Use the current length of the records_data in case some records were
            # successful but others were not
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    len(records_data))
            return

        # Error handle if failures occurred in PutRecordBatch after
        # several backoff attempts
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('[Firehose] The following records failed to put to '
                         'the Delivery Stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    original_batch_size)
            LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]',
                        original_batch_size,
                        stream_name,
                        resp.get('ResponseMetadata', {}).get('RequestId', ''))
Пример #25
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains:
        available data sources, log formats, parser modes, and sinks.  Classifies
        logs sent into the stream into a parsed type.  Matches records against
        rules.

        Args:
            event: An AWS event mapped to a specific source/entity (kinesis stream or
                an s3 bucket event) containing data emitted to the stream.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of Records: %d', len(records))
        if not records:
            return False

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS,
                                len(records))

        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(
                raw_record)
            if not service:
                LOGGER.error(
                    'No valid service found in payload\'s raw record. Skipping '
                    'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            self._process_alerts(payload)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS,
                                len(self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2))

        return self._failed_record_count == 0
Пример #26
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains
        available data sources, log schemas, normalized types, and outputs.
        Classifies logs sent into a parsed type.
        Matches records against rules.

        Args:
            event (dict): An AWS event mapped to a specific source/entity
                containing data read by Lambda.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of incoming records: %d', len(records))
        if not records:
            return False

        firehose_config = self.config['global'].get('infrastructure',
                                                    {}).get('firehose', {})
        if firehose_config.get('enabled'):
            self._firehose_client = StreamAlertFirehose(
                self.env['lambda_region'], firehose_config,
                self.config['logs'])

        payload_with_normalized_records = []
        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(
                raw_record)
            if not service:
                LOGGER.error(
                    'No valid service found in payload\'s raw record. Skipping '
                    'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            payload_with_normalized_records.extend(
                self._process_alerts(payload))

        # Log normalized records metric
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.NORMALIZED_RECORDS,
                                len(payload_with_normalized_records))

        # Apply Threat Intel to normalized records in the end of Rule Processor invocation
        record_alerts = self._rules_engine.threat_intel_match(
            payload_with_normalized_records)
        self._alerts.extend(record_alerts)
        if record_alerts:
            self.alert_forwarder.send_alerts(record_alerts)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS,
                                self._processed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS,
                                len(self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug(
                'Alerts:\n%s',
                json.dumps([alert.output_dict() for alert in self._alerts],
                           indent=2,
                           sort_keys=True))

        if self._firehose_client:
            self._firehose_client.send()

        # Only log rule info here if this is not running tests
        # During testing, this gets logged at the end and printing here could be confusing
        # since stress testing calls this method multiple times
        if self.env['lambda_alias'] != 'development':
            stats.print_rule_stats(True)

        return self._failed_record_count == 0
Пример #27
0
    def _send_to_firehose(self):
        """Send all classified records to a respective Firehose Delivery Stream"""
        def _chunk(record_list, chunk_size):
            """Helper function to chunk payloads"""
            for item in range(0, len(record_list), chunk_size):
                yield record_list[item:item + chunk_size]

        def _check_record_batch(batch):
            """Helper function to verify record size"""
            for index, record in enumerate(batch):
                if len(str(record)) > MAX_RECORD_SIZE:
                    # Show the first 1k bytes in order to not overload
                    # CloudWatch logs
                    LOGGER.error('The following record is too large'
                                 'be sent to Firehose: %s', str(record)[:1000])
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_FAILED_RECORDS,
                                            1)
                    batch.pop(index)

        delivery_stream_name_pattern = 'streamalert_data_{}'

        # Iterate through each payload type
        for log_type, records in self.categorized_payloads.items():
            # This same method is used when naming the Delivery Streams
            formatted_log_type = log_type.replace(':', '_')

            for record_batch in _chunk(records, MAX_BATCH_SIZE):
                stream_name = delivery_stream_name_pattern.format(formatted_log_type)
                _check_record_batch(record_batch)

                resp = self.firehose_client.put_record_batch(
                    DeliveryStreamName=stream_name,
                    # The newline at the end is required by Firehose,
                    # otherwise all records will be on a single line and
                    # unsearchable in Athena.
                    Records=[{'Data': json.dumps(record, separators=(",", ":")) + '\n'}
                             for record
                             in record_batch])

                # Error handle if failures occured
                # TODO(jack) implement backoff here once the rule processor is split
                if resp.get('FailedPutCount') > 0:
                    failed_records = [failed
                                      for failed
                                      in resp['RequestResponses']
                                      if failed.get('ErrorCode')]
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_FAILED_RECORDS,
                                            resp['FailedPutCount'])
                    # Only print the first 100 failed records
                    LOGGER.error('The following records failed to Put to the'
                                 'Delivery stream %s: %s',
                                 stream_name,
                                 json.dumps(failed_records[:100], indent=2))
                else:
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_RECORDS_SENT,
                                            len(record_batch))
                    LOGGER.info('Successfully sent %d messages to Firehose:%s',
                                len(record_batch),
                                stream_name)