def _load_rule_table(cls, config): """Load and return a RuleTable class for communicating with the DynamoDB rule table Args: config (dict): Loaded configuration from 'conf/' directory Returns: rule_table.RuleTable: Loaded frontend for DynamoDB rules table """ # Ensure the rules table is enabled rt_config = config['global']['infrastructure']['rules_table'] if not rt_config.get('enabled', False): return now = datetime.utcnow() refresh_delta = timedelta( minutes=rt_config.get('cache_refresh_minutes', 10)) # The rule table will need 'refreshed' if the refresh interval has been surpassed needs_refresh = cls._RULE_TABLE_LAST_REFRESH + refresh_delta < now if not needs_refresh: LOGGER.debug( 'Rule table does not need refreshed (last refresh time: %s; ' 'current time: %s)', cls._RULE_TABLE_LAST_REFRESH, now) return LOGGER.info( 'Refreshing rule table (last refresh time: %s; current time: %s)', cls._RULE_TABLE_LAST_REFRESH, now) table_name = '{}_streamalert_rules'.format( config['global']['account']['prefix']) cls._RULE_TABLE = RuleTable(table_name) cls._RULE_TABLE_LAST_REFRESH = now
def process(cls, input_payload): """Process rules on a record. Gather a list of rules based on the record's datasource type. For each rule, evaluate the record through all listed matchers and the rule itself to determine if a match occurs. Returns: list: alerts An alert is represented as a dictionary with the following keys: rule_name: the name of the triggered rule payload: the StreamPayload object outputs: list of outputs to send to """ alerts = [] payload = copy(input_payload) rules = [ rule_attrs for rule_attrs in cls.__rules.values() if payload.log_source in rule_attrs.logs ] if not rules: LOGGER.debug('No rules to process for %s', payload) return alerts for record in payload.records: for rule in rules: # subkey check has_sub_keys = cls.process_subkeys(record, payload.type, rule) if not has_sub_keys: continue # matcher check matcher_result = cls.match_event(record, rule) if not matcher_result: continue # rule analysis rule_result = cls.process_rule(record, rule) if rule_result: LOGGER.info( 'Rule [%s] triggered an alert on log type [%s] from entity \'%s\' ' 'in service \'%s\'', rule.rule_name, payload.log_source, payload.entity, payload.service()) alert = { 'record': record, 'rule_name': rule.rule_name, 'rule_description': rule.rule_function.__doc__ or DEFAULT_RULE_DESCRIPTION, 'log_source': str(payload.log_source), 'log_type': payload.type, 'outputs': rule.outputs, 'source_service': payload.service(), 'source_entity': payload.entity } alerts.append(alert) return alerts
def _validate_type_mapping(mapping_str): """Static method to extract normalized type and IOC type from qualified str Args: mapping_str (str): A qualified string has pattern 'normalized_type:ioc_type' Returns: A tuple(bool, str, str) bool: First return indicate if the string a qualifited string contains both normalized CEF type and IOC type. str: Second return is normalized type. str: Last return is IOC type. """ normalized_type = None ioc_type = None splitted_str = mapping_str.split(':') if len(splitted_str) == 1: normalized_type = splitted_str[0] elif len(splitted_str) == 2: normalized_type = splitted_str[0] ioc_type = splitted_str[1].split('_')[-1] else: LOGGER.info('Key %s in conf/types.json is incorrect', mapping_str) return False, None, None if normalized_type and ioc_type: return True, normalized_type, ioc_type return False, normalized_type, None
def rule_analysis(record, rule, payload, alerts): """Class method to analyze rule against a record Args: record (dict): A parsed log with data. rule: Rule attributes. payload: The StreamPayload object. alerts (list): A list of alerts which will be sent to alert processor. Returns: (dict): A list of alerts. """ rule_result = StreamRules.process_rule(record, rule) if rule_result: if StreamRules.check_alerts_duplication(record, rule, alerts): return LOGGER.info( 'Rule [%s] triggered an alert on log type [%s] from entity \'%s\' ' 'in service \'%s\'', rule.rule_name, payload.log_source, payload.entity, payload.service()) alert = { 'record': record, 'rule_name': rule.rule_name, 'rule_description': rule.rule_function.__doc__ or DEFAULT_RULE_DESCRIPTION, 'log_source': str(payload.log_source), 'log_type': payload.type, 'outputs': rule.outputs, 'source_service': payload.service(), 'source_entity': payload.entity, 'context': rule.context } alerts.append(alert)
def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" LOGGER.info('[Firehose] Sending %d records to %s', record_batch_size, stream_name) return self._firehose_client.put_record_batch( DeliveryStreamName=stream_name, Records=data)
def _send_to_dynamo(self, alerts): """Write alerts in batches to Dynamo.""" # The batch_writer() automatically handles buffering, batching, and retrying failed items with self.table.batch_writer() as batch: for alert in alerts: batch.put_item(Item=self.dynamo_record(alert)) LOGGER.info('Successfully sent %d alert(s) to dynamo:%s', len(alerts), self.table.table_name)
def _download_object(self, region, bucket, key): """Download an object from S3. Verifies the S3 object is less than or equal to 128MB, and downloads it into a temp file. Lambda can only execute for a maximum of 300 seconds, and the file to download greatly impacts that time. Args: region (str): AWS region to use for boto client instance. bucket (str): S3 bucket to download object from. key (str): Key of s3 object. Returns: str: The downloaded path of the S3 object. """ size_kb = self.s3_object_size / 1024.0 size_mb = size_kb / 1024.0 display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format( size_kb) # File size checks before downloading if size_kb == 0: return elif size_mb > 128: raise S3ObjectSizeError( '[S3Payload] The S3 object {}/{} is too large [{}] to download ' 'from S3'.format(bucket, key, display_size)) # Bandit warns about using a shell process, ignore with #nosec LOGGER.debug(os.popen('df -h /tmp | tail -1').read().strip()) # nosec LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]', bucket, key, display_size) # Convert the S3 object name to store as a file in the Lambda container suffix = key.replace('/', '-') file_descriptor, downloaded_s3_object = tempfile.mkstemp(suffix=suffix) with open(downloaded_s3_object, 'wb') as data: client = boto3.client('s3', region_name=region) start_time = time.time() client.download_fileobj(bucket, key, data) # Explicitly call os.close on the underlying open file descriptor # Addresses https://github.com/airbnb/streamalert/issues/587 os.close(file_descriptor) total_time = time.time() - start_time LOGGER.info('Completed download in %s seconds', round(total_time, 2)) # Log a metric on how long this object took to download MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME, total_time) return downloaded_s3_object
def sink(self, alerts): """Sink triggered alerts from the StreamRules engine. Args: alerts (list): a list of dictionaries representating json alerts Sends a message to the alert processor with the following JSON format: { "record": record, "metadata": { "rule_name": rule.rule_name, "rule_description": rule.rule_function.__doc__, "log": str(payload.log_source), "outputs": rule.outputs, "type": payload.type, "source": { "service": payload.service, "entity": payload.entity } } } """ for alert in alerts: try: data = json.dumps(alert, default=lambda o: o.__dict__) except AttributeError as err: LOGGER.error( 'An error occurred while dumping alert to JSON: %s ' 'Alert: %s', err.message, alert) continue try: response = self.client_lambda.invoke( FunctionName=self.function, InvocationType='Event', Payload=data, Qualifier='production') except ClientError as err: LOGGER.exception( 'An error occurred while sending alert to ' '\'%s:production\'. Error is: %s. Alert: %s', self.function, err.response, data) continue if response['ResponseMetadata']['HTTPStatusCode'] != 202: LOGGER.error('Failed to send alert to \'%s\': %s', self.function, data) continue if self.env['lambda_alias'] != 'development': LOGGER.info( 'Sent alert to \'%s\' with Lambda request ID \'%s\'', self.function, response['ResponseMetadata']['RequestId'])
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ record_batch_size = len(record_batch) resp = {} try: LOGGER.debug('Sending %d records to Firehose:%s', record_batch_size, stream_name) resp = self.firehose_client.put_record_batch( DeliveryStreamName=stream_name, # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. Records=[{'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch]) except ClientError as firehose_err: LOGGER.error(firehose_err) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, record_batch_size) return # Error handle if failures occured in PutRecordBatch # TODO(jack) implement backoff here for additional message reliability if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('The following records failed to Put to the' 'Delivery stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, record_batch_size) LOGGER.info('Successfully sent %d messages to Firehose:%s', record_batch_size, stream_name)
def firehose_request_wrapper(): """Firehose request wrapper to use with backoff""" LOGGER.info('[Firehose] Sending %d records to %s', record_batch_size, stream_name) return self._firehose_client.put_record_batch( DeliveryStreamName=stream_name, # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. Records=[{'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch])
def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" # Use the current length of data here so we can track failed records that are retried LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name) response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data) # Log this as an error for now so it can be picked up in logs if response['FailedPutCount'] > 0: LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount']) # Strip out the successful records so only the failed ones are retried. This happens # to the list of dictionary objects, so the called function sees the updated list self._strip_successful_records(data, response) return response
def send_alerts(self, alerts): """Send alerts to the Dynamo table. Args: alerts (list): A list of Alert instances to save to Dynamo. """ try: self._table.add_alerts(alerts) LOGGER.info('Successfully sent %d alert(s) to dynamo:%s', len(alerts), self._table.name) except ClientError: # add_alerts() automatically retries transient errors - any raised ClientError # is likely unrecoverable. Log an exception and metric LOGGER.exception('Error saving alerts to Dynamo') MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_DYNAMO_WRITES, 1)
def rule_analysis(self, record, rule, payload, alerts): """Analyze a rule against the record, adding a new alert if applicable. Args: record (dict): A parsed log with data. rule (RuleAttributes): Attributes for the rule which triggered the alert. payload (StreamPayload): Payload with information about the source of the record. alerts (list): The current list of Alert instances. If the rule returns True on the record, a new Alert instance is added to this list. """ rule_result = rule.process(record) if not rule_result: return # when threat intel enabled, normalized records will be re-analyzed by # all rules. Thus we need to check duplication. if self._threat_intel and self.check_alerts_duplication( record, rule, alerts): return # Check if the rule is staged and, if so, only use the required alert outputs if rule.is_staged(self._RULE_TABLE): all_outputs = self._required_outputs_set else: # Otherwise, combine the required alert outputs with the ones for this rule all_outputs = self._required_outputs_set.union(rule.outputs_set) alert = Alert(rule.name, record, all_outputs, cluster=os.environ['CLUSTER'], context=rule.context, log_source=str(payload.log_source), log_type=payload.type, merge_by_keys=rule.merge_by_keys, merge_window=timedelta(minutes=rule.merge_window_mins), rule_description=rule.description, source_entity=payload.entity, source_service=payload.service(), staged=rule.is_staged(self._RULE_TABLE)) LOGGER.info( 'Rule [%s] triggered alert [%s] on log type [%s] from entity \'%s\' ' 'in service \'%s\'', rule.name, alert.alert_id, payload.log_source, payload.entity, payload.service()) alerts.append(alert)
def _download_object(self, region, bucket, key): """Download an object from S3. Verifies the S3 object is less than or equal to 128MB, and downloads it into a temp file. Lambda can only execute for a maximum of 300 seconds, and the file to download greatly impacts that time. Args: region (str): AWS region to use for boto client instance. bucket (str): S3 bucket to download object from. key (str): Key of s3 object. Returns: str: The downloaded path of the S3 object. """ size_kb = self.s3_object_size / 1024.0 size_mb = size_kb / 1024.0 if size_mb > 128: raise S3ObjectSizeError('S3 object to download is above 128MB') # Bandit warns about using a shell process, ignore with #nosec LOGGER.debug(os.popen('df -h /tmp | tail -1').read().strip()) # nosec display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format( size_kb) LOGGER.info('Starting download from S3: %s/%s [%s]', bucket, key, display_size) suffix = key.replace('/', '-') _, downloaded_s3_object = tempfile.mkstemp(suffix=suffix) with open(downloaded_s3_object, 'wb') as data: client = boto3.client('s3', region_name=region) start_time = time.time() client.download_fileobj(bucket, key, data) total_time = time.time() - start_time LOGGER.info('Completed download in %s seconds', round(total_time, 2)) # Log a metric on how long this object took to download MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME, total_time) return downloaded_s3_object
def rule_analysis(self, record, rule, payload, alerts): """Class method to analyze rule against a record Args: record (dict): A parsed log with data. rule: Rule attributes. payload: The StreamPayload object. alerts (list): A list of alerts which will be sent to alert processor. Returns: dict: A list of alerts. """ rule_result = StreamRules.process_rule(record, rule) if rule_result: if StreamRules.check_alerts_duplication(record, rule, alerts): return alert_id = str(uuid.uuid4()) # Random unique alert ID LOGGER.info( 'Rule [%s] triggered alert [%s] on log type [%s] from entity \'%s\' ' 'in service \'%s\'', rule.rule_name, alert_id, payload.log_source, payload.entity, payload.service()) # Combine the required alert outputs with the ones for this rule all_outputs = self._required_outputs_set.union( set(rule.outputs or [])) alert = { 'id': alert_id, 'record': record, 'rule_name': rule.rule_name, 'rule_description': rule.rule_function.__doc__ or DEFAULT_RULE_DESCRIPTION, 'log_source': str(payload.log_source), 'log_type': payload.type, 'outputs': list(all_outputs), # TODO: @austinbyers - change this to a set 'source_service': payload.service(), 'source_entity': payload.entity, 'context': rule.context } alerts.append(alert)
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ resp = {} record_batch_size = len(record_batch) exceptions_to_backoff = (ClientError, ConnectionError) @backoff.on_predicate(backoff.fibo, lambda resp: resp['FailedPutCount'] > 0, max_tries=self.MAX_BACKOFF_ATTEMPTS, max_value=self.MAX_BACKOFF_FIBO_VALUE, jitter=backoff.full_jitter, on_backoff=backoff_handler, on_success=success_handler, on_giveup=giveup_handler) @backoff.on_exception(backoff.fibo, exceptions_to_backoff, max_tries=self.MAX_BACKOFF_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler, on_success=success_handler, on_giveup=giveup_handler) def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" LOGGER.info('[Firehose] Sending %d records to %s', record_batch_size, stream_name) return self._firehose_client.put_record_batch( DeliveryStreamName=stream_name, Records=data) # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. records_data = [ {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch ] # The try/except here is to catch the raised error at the # end of the backoff. try: resp = firehose_request_wrapper(records_data) except exceptions_to_backoff as firehose_err: LOGGER.error(firehose_err) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, record_batch_size) return # Error handle if failures occurred in PutRecordBatch after # several backoff attempts if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('[Firehose] The following records failed to put to ' 'the Delivery Stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, record_batch_size) LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]', record_batch_size, stream_name, resp.get('ResponseMetadata', {}).get('RequestId', ''))
def run(self, event): """StreamAlert Lambda function handler. Loads the configuration for the StreamAlert function which contains available data sources, log schemas, normalized types, and outputs. Classifies logs sent into a parsed type. Matches records against rules. Args: event (dict): An AWS event mapped to a specific source/entity containing data read by Lambda. Returns: bool: True if all logs being parsed match a schema """ records = event.get('Records', []) LOGGER.debug('Number of incoming records: %d', len(records)) if not records: return False firehose_config = self.config['global'].get('infrastructure', {}).get('firehose', {}) if firehose_config.get('enabled'): self._firehose_client = StreamAlertFirehose( self.env['lambda_region'], firehose_config, self.config['logs']) payload_with_normalized_records = [] for raw_record in records: # Get the service and entity from the payload. If the service/entity # is not in our config, log and error and go onto the next record service, entity = self.classifier.extract_service_and_entity( raw_record) if not service: LOGGER.error( 'No valid service found in payload\'s raw record. Skipping ' 'record: %s', raw_record) continue if not entity: LOGGER.error( 'Unable to extract entity from payload\'s raw record for service %s. ' 'Skipping record: %s', service, raw_record) continue # Cache the log sources for this service and entity on the classifier if not self.classifier.load_sources(service, entity): continue # Create the StreamPayload to use for encapsulating parsed info payload = load_stream_payload(service, entity, raw_record) if not payload: continue payload_with_normalized_records.extend( self._process_alerts(payload)) LOGGER.info('Got %d normalized records', len(payload_with_normalized_records)) # Apply Threat Intel to normalized records in the end of Rule Processor invocation record_alerts = self._rule_engine.threat_intel_match( payload_with_normalized_records) self._alerts.extend(record_alerts) if record_alerts and self.enable_alert_processor: self.sinker.sink(record_alerts) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, self._processed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_PROCESSED_SIZE, self._processed_size) LOGGER.debug('Invalid record count: %d', self._failed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count) LOGGER.debug('%s alerts triggered', len(self._alerts)) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(self._alerts)) # Check if debugging logging is on before json dumping alerts since # this can be time consuming if there are a lot of alerts if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG): LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2)) if self._firehose_client: self._firehose_client.send() return self._failed_record_count == 0
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ exceptions_to_backoff = (ClientError, ConnectionError, Timeout) @backoff.on_predicate(backoff.fibo, lambda resp: resp['FailedPutCount'] > 0, max_tries=self.MAX_BACKOFF_ATTEMPTS, max_value=self.MAX_BACKOFF_FIBO_VALUE, jitter=backoff.full_jitter, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) @backoff.on_exception(backoff.fibo, exceptions_to_backoff, max_tries=self.MAX_BACKOFF_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" # Use the current length of data here so we can track failed records that are retried LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name) response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data) # Log this as an error for now so it can be picked up in logs if response['FailedPutCount'] > 0: LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount']) # Strip out the successful records so only the failed ones are retried. This happens # to the list of dictionary objects, so the called function sees the updated list self._strip_successful_records(data, response) return response original_batch_size = len(record_batch) # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. records_data = [ {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch ] # The try/except here is to catch the raised error at the end of the backoff try: resp = firehose_request_wrapper(records_data) except exceptions_to_backoff as firehose_err: LOGGER.error(firehose_err) # Use the current length of the records_data in case some records were # successful but others were not MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, len(records_data)) return # Error handle if failures occurred in PutRecordBatch after # several backoff attempts if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('[Firehose] The following records failed to put to ' 'the Delivery Stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, original_batch_size) LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]', original_batch_size, stream_name, resp.get('ResponseMetadata', {}).get('RequestId', ''))
def _backoff_handler_firehose_reset(self, details): """Custom backoff handler to re-instantiate the Firehose Client""" LOGGER.info( '[Backoff]: Calling \'%s\' again in %f seconds with %d tries so far', details['target'].__name__, details['wait'], details['tries']) self._reset_firehose_client()
def _send_to_firehose(self): """Send all classified records to a respective Firehose Delivery Stream""" def _chunk(record_list, chunk_size): """Helper function to chunk payloads""" for item in range(0, len(record_list), chunk_size): yield record_list[item:item + chunk_size] def _check_record_batch(batch): """Helper function to verify record size""" for index, record in enumerate(batch): if len(str(record)) > MAX_RECORD_SIZE: # Show the first 1k bytes in order to not overload # CloudWatch logs LOGGER.error('The following record is too large' 'be sent to Firehose: %s', str(record)[:1000]) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, 1) batch.pop(index) delivery_stream_name_pattern = 'streamalert_data_{}' # Iterate through each payload type for log_type, records in self.categorized_payloads.items(): # This same method is used when naming the Delivery Streams formatted_log_type = log_type.replace(':', '_') for record_batch in _chunk(records, MAX_BATCH_SIZE): stream_name = delivery_stream_name_pattern.format(formatted_log_type) _check_record_batch(record_batch) resp = self.firehose_client.put_record_batch( DeliveryStreamName=stream_name, # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. Records=[{'Data': json.dumps(record, separators=(",", ":")) + '\n'} for record in record_batch]) # Error handle if failures occured # TODO(jack) implement backoff here once the rule processor is split if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records LOGGER.error('The following records failed to Put to the' 'Delivery stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, len(record_batch)) LOGGER.info('Successfully sent %d messages to Firehose:%s', len(record_batch), stream_name)