def _finalize(cls, response, stream_name, size): """Perform any final operations for this response, such as metric logging, etc Args: response (dict): boto3 client response object stream_name (str): The name of the Delivery Stream to send to size (int): The original size of the batch being sent """ if not response: return # Could happen in the case of backoff failing entirely # Check for failures that occurred in PutRecordBatch after several backoff attempts if response.get('FailedPutCount'): failed_records = [ failed for failed in response['RequestResponses'] if failed.get('ErrorCode') ] cls._log_failed(response['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error( 'Failed to put the following records to firehose %s: %s', stream_name, json.dumps(failed_records[:1024], indent=2)) return MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, size) LOGGER.info( 'Successfully sent %d message(s) to firehose %s with RequestId \'%s\'', size, stream_name, response.get('ResponseMetadata', {}).get('RequestId', ''))
def _read_file(self): """Download and read the contents of the S3 file Yields: tuple: line number, contents of the line being read """ bucket = self._unquote(self.bucket) key = self._unquote(self.key) # Use tempfile.TemporaryFile to do the download # This will automatically close/get garbage collected upon completion with tempfile.TemporaryFile() as download: client = boto3.resource('s3', region_name=self.region).Bucket(bucket) start_time = time.time() LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]', bucket, key, self.size) try: client.download_fileobj(key, download) except (IOError, ClientError): LOGGER.exception('Failed to download object from S3') raise total_time = time.time() - start_time LOGGER.info('Completed download in %s seconds', round(total_time, 2)) # Log a metric on how long this object took to download MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME, total_time) for line_num, line in self._read_downloaded_object(download): yield line_num, line # Reading was a success, so truncate the file contents and return download.seek(0) download.truncate()
def run(self, categorized_records): """Run extract artifacts logic and send artifacts to the Firehose for retention Args: categorized_records (dict): A dictionary contains log source type and records with following format { 'log_type_01_sub_type_01': [{'key': 'value'}], 'log_type_01_sub_type_02': [{'key': 'value'}], 'log_type_02_sub_type_01': [{'key': 'value'}], 'log_type_02_sub_type_02': [{'key': 'value'}] } """ for source_type, records in categorized_records.items(): LOGGER.debug('Extracting artifacts from %d %s logs', len(records), source_type) for artifact in self._extract_artifacts(source_type, records): self._artifacts.append(artifact.artifact) LOGGER.debug('Extracted %d artifact(s)', len(self._artifacts)) MetricLogger.log_metric(CLASSIFIER_FUNCTION_NAME, MetricLogger.EXTRACTED_ARTIFACTS, len(self._artifacts)) self.firehose.send_artifacts(self._artifacts, self._dst_firehose_stream_name)
def _log_failed(cls, count, function_name): """Helper to log the failed Firehose records metric Args: count (int): Number of failed records """ MetricLogger.log_metric(function_name, MetricLogger.FIREHOSE_FAILED_RECORDS, count)
def _dispatch_alert(self, alert): """Dispatch a single alert to the alert processor.""" alert.attempts += 1 LOGGER.info('Dispatching %s to %s (attempt %d)', alert, self.alert_proc, alert.attempts) MetricLogger.log_metric(ALERT_MERGER_NAME, MetricLogger.ALERT_ATTEMPTS, alert.attempts) record_payload = json.dumps(alert.dynamo_record(), default=list, separators=(',', ':')) if len(record_payload) <= self.MAX_LAMBDA_PAYLOAD_SIZE: # The entire alert fits in the Lambda payload - send it all payload = record_payload else: # The alert is too big - the alert processor will have to pull it from Dynamo payload = json.dumps(alert.dynamo_key) self.lambda_client.invoke(FunctionName=self.alert_proc, InvocationType='Event', Payload=payload, Qualifier='production') alert.dispatched = datetime.utcnow() self.table.mark_as_dispatched(alert)
def _log_failed(cls, count): """Helper to log the failed Firehose records metric Args: count (int): Number of failed records """ MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, count)
def run(self, records): """Run rules against the records sent from the Classifier function Args: records (list): Dictionaries of records sent from the classifier function Record Format: { 'cluster': 'prod', 'log_schema_type': 'cloudwatch:cloudtrail', 'record': { 'key': 'value' }, 'service': 'kinesis', 'resource': 'kinesis_stream_name' 'data_type': 'json' } Returns: list: Alerts that have been triggered by this data """ LOGGER.info('Processing %d records', len(records)) # Extract any threat intelligence matches from the records self._extract_threat_intel(records) alerts = [] for payload in records: rules = Rule.rules_for_log_type(payload['log_schema_type']) if not rules: LOGGER.debug('No rules to process for %s', payload) continue for rule in rules: # subkey check if not self._process_subkeys(payload['record'], rule): continue # matcher check if not rule.check_matchers(payload['record']): continue alert = self._rule_analysis(payload, rule) if alert: alerts.append(alert) self._alert_forwarder.send_alerts(alerts) # Only log rule info here if this is deployed in Lambda or explicitly enabled # During testing, this gets logged at the very end if self._rule_stat_tracker.enabled: LOGGER.info(RuleStatisticTracker.statistics_info()) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(alerts)) return alerts
def _pre_parse(self): """Pre-parsing method for incoming app records This iterates over all the incoming logs in the 'logs' list. Yields: Instances of PayloadRecord back to the caller containing the current log data """ for data in self.raw_record['logs']: yield PayloadRecord(data) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_STREAMALERT_APP_RECORDS, len(self.raw_record['logs']))
def _pre_parse(self): """Pre-parsing method for S3 objects Downloads the s3 object into the system's temp directory for reading. The file is kept open as a tempfile.TemporaryFile to ensure proper cleanup when reading finishes. Yields: Instances of PayloadRecord back to the caller containing the current log data """ if not self._check_size(): return # _check_size can raise an exception as well line_num = 0 for line_num, data in self._read_file(): yield PayloadRecord(data) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_S3_RECORDS, line_num)
def send_alerts(self, alerts): """Send alerts to the Dynamo table. Args: alerts (list): A list of Alert instances to save to Dynamo. """ if not alerts: return # nothing to do try: self._table.add_alerts(alerts) except ClientError: # add_alerts() automatically retries transient errors - any raised ClientError # is likely unrecoverable. Log an exception and metric LOGGER.exception( 'An error occurred when sending alerts to DynamoDB') MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_DYNAMO_WRITES, 1) return LOGGER.info('Successfully sent %d alert(s) to dynamo:%s', len(alerts), self._table.name)
def _log_metrics(self): """Perform some metric logging before exiting""" MetricLogger.log_metric( FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, sum(len(payload.parsed_records) for payload in self._payloads)) MetricLogger.log_metric( FUNCTION_NAME, MetricLogger.NORMALIZED_RECORDS, sum(1 for payload in self._payloads for log in payload.parsed_records if log.get(Normalizer.NORMALIZATION_KEY))) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_PROCESSED_SIZE, self._processed_size) LOGGER.debug('Invalid record count: %d', self._failed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count)