示例#1
0
    def _finalize(cls, response, stream_name, size):
        """Perform any final operations for this response, such as metric logging, etc

        Args:
            response (dict): boto3 client response object
            stream_name (str): The name of the Delivery Stream to send to
            size (int): The original size of the batch being sent
        """
        if not response:
            return  # Could happen in the case of backoff failing entirely

        # Check for failures that occurred in PutRecordBatch after several backoff attempts
        if response.get('FailedPutCount'):
            failed_records = [
                failed for failed in response['RequestResponses']
                if failed.get('ErrorCode')
            ]
            cls._log_failed(response['FailedPutCount'])

            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error(
                'Failed to put the following records to firehose %s: %s',
                stream_name, json.dumps(failed_records[:1024], indent=2))
            return

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.FIREHOSE_RECORDS_SENT, size)
        LOGGER.info(
            'Successfully sent %d message(s) to firehose %s with RequestId \'%s\'',
            size, stream_name,
            response.get('ResponseMetadata', {}).get('RequestId', ''))
示例#2
0
文件: s3.py 项目: webvul/streamalert
    def _read_file(self):
        """Download and read the contents of the S3 file

        Yields:
            tuple: line number, contents of the line being read
        """
        bucket = self._unquote(self.bucket)
        key = self._unquote(self.key)

        # Use tempfile.TemporaryFile to do the download
        # This will automatically close/get garbage collected upon completion
        with tempfile.TemporaryFile() as download:
            client = boto3.resource('s3', region_name=self.region).Bucket(bucket)
            start_time = time.time()
            LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]', bucket, key, self.size)

            try:
                client.download_fileobj(key, download)
            except (IOError, ClientError):
                LOGGER.exception('Failed to download object from S3')
                raise

            total_time = time.time() - start_time
            LOGGER.info('Completed download in %s seconds', round(total_time, 2))

            # Log a metric on how long this object took to download
            MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME, total_time)

            for line_num, line in self._read_downloaded_object(download):
                yield line_num, line

            # Reading was a success, so truncate the file contents and return
            download.seek(0)
            download.truncate()
示例#3
0
    def run(self, categorized_records):
        """Run extract artifacts logic and send artifacts to the Firehose for retention

        Args:
            categorized_records (dict): A dictionary contains log source type and records with
            following format
                {
                    'log_type_01_sub_type_01': [{'key': 'value'}],
                    'log_type_01_sub_type_02': [{'key': 'value'}],
                    'log_type_02_sub_type_01': [{'key': 'value'}],
                    'log_type_02_sub_type_02': [{'key': 'value'}]
                }
        """

        for source_type, records in categorized_records.items():
            LOGGER.debug('Extracting artifacts from %d %s logs', len(records),
                         source_type)
            for artifact in self._extract_artifacts(source_type, records):
                self._artifacts.append(artifact.artifact)

        LOGGER.debug('Extracted %d artifact(s)', len(self._artifacts))

        MetricLogger.log_metric(CLASSIFIER_FUNCTION_NAME,
                                MetricLogger.EXTRACTED_ARTIFACTS,
                                len(self._artifacts))

        self.firehose.send_artifacts(self._artifacts,
                                     self._dst_firehose_stream_name)
示例#4
0
    def _log_failed(cls, count, function_name):
        """Helper to log the failed Firehose records metric

        Args:
            count (int): Number of failed records
        """
        MetricLogger.log_metric(function_name, MetricLogger.FIREHOSE_FAILED_RECORDS, count)
示例#5
0
    def _dispatch_alert(self, alert):
        """Dispatch a single alert to the alert processor."""
        alert.attempts += 1
        LOGGER.info('Dispatching %s to %s (attempt %d)', alert,
                    self.alert_proc, alert.attempts)
        MetricLogger.log_metric(ALERT_MERGER_NAME, MetricLogger.ALERT_ATTEMPTS,
                                alert.attempts)

        record_payload = json.dumps(alert.dynamo_record(),
                                    default=list,
                                    separators=(',', ':'))

        if len(record_payload) <= self.MAX_LAMBDA_PAYLOAD_SIZE:
            # The entire alert fits in the Lambda payload - send it all
            payload = record_payload
        else:
            # The alert is too big - the alert processor will have to pull it from Dynamo
            payload = json.dumps(alert.dynamo_key)

        self.lambda_client.invoke(FunctionName=self.alert_proc,
                                  InvocationType='Event',
                                  Payload=payload,
                                  Qualifier='production')

        alert.dispatched = datetime.utcnow()
        self.table.mark_as_dispatched(alert)
示例#6
0
    def _log_failed(cls, count):
        """Helper to log the failed Firehose records metric

        Args:
            count (int): Number of failed records
        """
        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.FIREHOSE_FAILED_RECORDS, count)
示例#7
0
    def run(self, records):
        """Run rules against the records sent from the Classifier function

        Args:
            records (list): Dictionaries of records sent from the classifier function
                Record Format:
                    {
                        'cluster': 'prod',
                        'log_schema_type': 'cloudwatch:cloudtrail',
                        'record': {
                            'key': 'value'
                        },
                        'service': 'kinesis',
                        'resource': 'kinesis_stream_name'
                        'data_type': 'json'
                    }

        Returns:
            list: Alerts that have been triggered by this data
        """
        LOGGER.info('Processing %d records', len(records))

        # Extract any threat intelligence matches from the records
        self._extract_threat_intel(records)

        alerts = []
        for payload in records:
            rules = Rule.rules_for_log_type(payload['log_schema_type'])
            if not rules:
                LOGGER.debug('No rules to process for %s', payload)
                continue

            for rule in rules:
                # subkey check
                if not self._process_subkeys(payload['record'], rule):
                    continue

                # matcher check
                if not rule.check_matchers(payload['record']):
                    continue

                alert = self._rule_analysis(payload, rule)
                if alert:
                    alerts.append(alert)

        self._alert_forwarder.send_alerts(alerts)

        # Only log rule info here if this is deployed in Lambda or explicitly enabled
        # During testing, this gets logged at the very end
        if self._rule_stat_tracker.enabled:
            LOGGER.info(RuleStatisticTracker.statistics_info())

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(alerts))

        return alerts
示例#8
0
    def _pre_parse(self):
        """Pre-parsing method for incoming app records

        This iterates over all the incoming logs in the 'logs' list.

        Yields:
            Instances of PayloadRecord back to the caller containing the current log data
        """
        for data in self.raw_record['logs']:
            yield PayloadRecord(data)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_STREAMALERT_APP_RECORDS,
                                len(self.raw_record['logs']))
示例#9
0
文件: s3.py 项目: webvul/streamalert
    def _pre_parse(self):
        """Pre-parsing method for S3 objects

        Downloads the s3 object into the system's temp directory for reading. The
        file is kept open as a tempfile.TemporaryFile to ensure proper cleanup
        when reading finishes.

        Yields:
            Instances of PayloadRecord back to the caller containing the current log data
        """
        if not self._check_size():
            return  # _check_size can raise an exception as well

        line_num = 0
        for line_num, data in self._read_file():
            yield PayloadRecord(data)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_S3_RECORDS, line_num)
示例#10
0
    def send_alerts(self, alerts):
        """Send alerts to the Dynamo table.

        Args:
            alerts (list): A list of Alert instances to save to Dynamo.
        """
        if not alerts:
            return  # nothing to do

        try:
            self._table.add_alerts(alerts)
        except ClientError:
            # add_alerts() automatically retries transient errors - any raised ClientError
            # is likely unrecoverable. Log an exception and metric
            LOGGER.exception(
                'An error occurred when sending alerts to DynamoDB')
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FAILED_DYNAMO_WRITES, 1)
            return

        LOGGER.info('Successfully sent %d alert(s) to dynamo:%s', len(alerts),
                    self._table.name)
示例#11
0
    def _log_metrics(self):
        """Perform some metric logging before exiting"""
        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.TOTAL_RECORDS,
            sum(len(payload.parsed_records) for payload in self._payloads))
        MetricLogger.log_metric(
            FUNCTION_NAME, MetricLogger.NORMALIZED_RECORDS,
            sum(1 for payload in self._payloads
                for log in payload.parsed_records
                if log.get(Normalizer.NORMALIZATION_KEY)))
        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES,
                                self._failed_record_count)