def _read_downloaded_s3_object(s3_object): """Read lines from a downloaded file from S3 Supports reading both gzipped files and plaintext files. Args: s3_object (str): A full path to the downloaded file. Yields: (str) Lines from the downloaded s3 object. """ _, extension = os.path.splitext(s3_object) if extension == '.gz': with gzip.open(s3_object, 'r') as s3_file: for num, line in enumerate(s3_file, start=1): yield num, line.rstrip() else: with open(s3_object, 'r') as s3_file: for num, line in enumerate(s3_file, start=1): yield num, line.rstrip() # AWS Lambda apparently does not reallocate disk space when files are # removed using os.remove(), so we must truncate them before removal with open(s3_object, 'w'): pass os.remove(s3_object) if not os.path.exists(s3_object): LOGGER.debug('Removed temp S3 file: %s', s3_object) else: LOGGER.error('Failed to remove temp S3 file: %s', s3_object)
def load_sources(self, service, entity): """Load the sources for this payload. Args: service (str): Source service entity (str): Entity within the service Returns: bool: True if the entity's log sources loaded properly """ # Clear the list from any previous runs del self._entity_log_sources[:] # Get all logs for the configured service/entity (s3, kinesis, or sns) service_entities = self._config['sources'].get(service) if not service_entities: LOGGER.error('Service [%s] not declared in sources configuration', service) return False config_entity = service_entities.get(entity) if not config_entity: LOGGER.error( 'Entity [%s] not declared in sources configuration for service [%s]', entity, service) return False # Get a copy of the logs list by slicing here, not a pointer to the list reference self._entity_log_sources = config_entity['logs'][:] return bool(self._entity_log_sources)
def _add_optional_keys(self, json_records, schema, optional_keys): """Add optional keys to a parsed JSON record. Args: json_records (list): JSONPath extracted JSON records schema (dict): The log type schema optional_keys (dict): The optional keys in the schema """ if not optional_keys: return for key_name in optional_keys: # Instead of doing a schema.update() here with a default value type, # we should enforce having any optional keys declared within the schema # and log an error if that is not the case if key_name not in schema: LOGGER.error( 'Optional top level key \'%s\' ' 'not found in declared log schema', key_name) continue # If the optional key isn't in our parsed json payload for record in json_records: if key_name not in record: # Set default value record[key_name] = self.default_optional_values( schema[key_name])
def match_event(cls, record, rule): """Evaluate matchers on a record. Given a list of matchers, evaluate a record through each to find a match. If any matcher is evaluated as false, the loop breaks and no further matchers are evaluated. Otherwise, returns True. Args: record: Record to be matched rule: Rule containing the list of matchers Returns: bool: result of matcher processing """ # matchers are optional for rules if not rule.matchers: return True for matcher in rule.matchers: matcher_function = cls.__matchers.get(matcher) if matcher_function: try: matcher_result = matcher_function(record) except Exception as err: # pylint: disable=broad-except matcher_result = False LOGGER.error('%s: %s', matcher_function.__name__, err.message) if not matcher_result: return False else: LOGGER.error('The matcher [%s] does not exist!', matcher) return True
def decorator(rule): """Rule decorator logic.""" rule_name = rule.__name__ logs = opts.get('logs') outputs = opts.get('outputs') matchers = opts.get('matchers') datatypes = opts.get('datatypes') req_subkeys = opts.get('req_subkeys') if not (logs or datatypes): LOGGER.error( 'Invalid rule [%s] - rule must have either \'logs\' or \'' 'datatypes\' declared', rule_name) return if not outputs: LOGGER.error( 'Invalid rule [%s] - rule must have \'outputs\' declared', rule_name) return if rule_name in cls.__rules: raise ValueError('rule [{}] already defined'.format(rule_name)) cls.__rules[rule_name] = RuleAttributes(rule_name, rule, matchers, datatypes, logs, outputs, req_subkeys) return rule
def _load_enabled_log_sources(self, firehose_config, log_sources): """Load and expand all declared and enabled Firehose log sources Args: firehose_config (dict): Loaded Firehose config from global.json log_sources (dict): Loaded logs.json file Returns: set: Disabled logs """ enabled_logs = set() for enabled_log in firehose_config.get('enabled_logs', []): enabled_log_parts = enabled_log.split(':') # Expand to all subtypes if len(enabled_log_parts) == 1: expanded_logs = [self.firehose_log_name(log_name) for log_name in log_sources if log_name.split(':')[0] == enabled_log_parts[0]] # If the list comprehension is Falsey, it means no matching logs # were found while doing the expansion. if not expanded_logs: LOGGER.error('Enabled Firehose log %s not declared in logs.json', enabled_log) enabled_logs.update(expanded_logs) elif len(enabled_log_parts) == 2: if enabled_log not in log_sources: LOGGER.error('Enabled Firehose log %s not declared in logs.json', enabled_log) enabled_logs.add(self.firehose_log_name('_'.join(enabled_log_parts))) return enabled_logs
def _process_alerts(self, payload): """Process records for alerts and send them to the correct places Args: payload (StreamPayload): StreamAlert payload object being processed """ for record in payload.pre_parse(): self.classifier.classify_record(record) if not record.valid: if self.env['lambda_alias'] != 'development': LOGGER.error( 'Record does not match any defined schemas: %s\n%s', record, record.pre_parsed_record) self._failed_record_count += 1 continue LOGGER.debug( 'Classified and Parsed Payload: <Valid: %s, Log Source: %s, Entity: %s>', record.valid, record.log_source, record.entity) record_alerts = StreamRules.process(record) LOGGER.debug( 'Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(record_alerts)) if not record_alerts: continue # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(record_alerts) if self.enable_alert_processor: self.sinker.sink(record_alerts)
def handler(event, context): """Main Lambda handler function""" try: StreamAlert(context).run(event) except Exception: LOGGER.error('Invocation event: %s', json.dumps(event)) raise
def _parse(self, payload): """Parse a record into a declared type. Args: payload: A StreamAlert payload object Sets: payload.log_source: The detected log name from the data_sources config. payload.type: The record's type. payload.records: The parsed records as a list. Returns: bool: the success of the parse. """ schema_matches = self._process_log_schemas(payload) if not schema_matches: return False if LOGGER_DEBUG_ENABLED: LOGGER.debug( 'Schema Matched Records:\n%s', json.dumps([ schema_match.parsed_data for schema_match in schema_matches ], indent=2)) schema_match = self._check_schema_match(schema_matches) if LOGGER_DEBUG_ENABLED: LOGGER.debug('Log name: %s', schema_match.log_name) LOGGER.debug('Parsed data:\n%s', json.dumps(schema_match.parsed_data, indent=2)) for parsed_data_value in schema_match.parsed_data: # Convert data types per the schema # Use the root schema for the parser due to updates caused by # configuration settings such as envelope_keys and optional_keys try: if not self._convert_type(parsed_data_value, schema_match.root_schema): return False except KeyError: LOGGER.error('The payload is mis-classified. Payload [%s]', parsed_data_value) return False normalized_types = StreamThreatIntel.normalized_type_mapping() payload.log_source = schema_match.log_name payload.type = schema_match.parser.type() payload.records = schema_match.parsed_data payload.normalized_types = normalized_types.get( payload.log_source.split(':')[0]) return True
def sink(self, alerts): """Sink triggered alerts from the StreamRules engine. Args: alerts (list): a list of dictionaries representating json alerts Sends a message to the alert processor with the following JSON format: { "record": record, "metadata": { "rule_name": rule.rule_name, "rule_description": rule.rule_function.__doc__, "log": str(payload.log_source), "outputs": rule.outputs, "type": payload.type, "source": { "service": payload.service, "entity": payload.entity } } } """ for alert in alerts: try: data = json.dumps(alert, default=lambda o: o.__dict__) except AttributeError as err: LOGGER.error( 'An error occurred while dumping alert to JSON: %s ' 'Alert: %s', err.message, alert) continue try: response = self.client_lambda.invoke( FunctionName=self.function, InvocationType='Event', Payload=data, Qualifier='production') except ClientError as err: LOGGER.exception( 'An error occurred while sending alert to ' '\'%s:production\'. Error is: %s. Alert: %s', self.function, err.response, data) continue if response['ResponseMetadata']['HTTPStatusCode'] != 202: LOGGER.error('Failed to send alert to \'%s\': %s', self.function, data) continue if self.env['lambda_alias'] != 'development': LOGGER.info( 'Sent alert to \'%s\' with Lambda request ID \'%s\'', self.function, response['ResponseMetadata']['RequestId'])
def _process_alerts(self, payload): """Process records for alerts and send them to the correct places Args: payload (StreamPayload): StreamAlert payload object being processed """ payload_with_normalized_records = [] for record in payload.pre_parse(): # Increment the processed size using the length of this record self._processed_size += len(record.pre_parsed_record) self.classifier.classify_record(record) if not record.valid: if self.env['lambda_alias'] != 'development': LOGGER.error( 'Record does not match any defined schemas: %s\n%s', record, record.pre_parsed_record) self._failed_record_count += 1 continue # Increment the total processed records to get an accurate assessment of throughput self._processed_record_count += len(record.records) LOGGER.debug( 'Classified and Parsed Payload: <Valid: %s, Log Source: %s, Entity: %s>', record.valid, record.log_source, record.entity) record_alerts, normalized_records = self._rule_engine.process( record) payload_with_normalized_records.extend(normalized_records) LOGGER.debug( 'Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(record_alerts)) # Add all parsed records to the categorized payload dict only if Firehose is enabled if self._firehose_client: # Only send payloads with enabled log sources if self._firehose_client.enabled_log_source( payload.log_source): self._firehose_client.categorized_payloads[ payload.log_source].extend(payload.records) if not record_alerts: continue # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(record_alerts) if self.enable_alert_processor: self.sinker.sink(record_alerts) return payload_with_normalized_records
def _check_record_batch(batch): """Helper function to verify record size""" for index, record in enumerate(batch): if len(str(record)) > MAX_RECORD_SIZE: # Show the first 1k bytes in order to not overload # CloudWatch logs LOGGER.error('The following record is too large' 'be sent to Firehose: %s', str(record)[:1000]) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, 1) batch.pop(index)
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ record_batch_size = len(record_batch) resp = {} try: LOGGER.debug('Sending %d records to Firehose:%s', record_batch_size, stream_name) resp = self.firehose_client.put_record_batch( DeliveryStreamName=stream_name, # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. Records=[{'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch]) except ClientError as firehose_err: LOGGER.error(firehose_err) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, record_batch_size) return # Error handle if failures occured in PutRecordBatch # TODO(jack) implement backoff here for additional message reliability if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('The following records failed to Put to the' 'Delivery stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, record_batch_size) LOGGER.info('Successfully sent %d messages to Firehose:%s', record_batch_size, stream_name)
def enabled_log_source(cls, log_source_name): """Check that the incoming record is an enabled log source for Firehose Args: log_source_name (str): The log source of the record Returns: bool: Whether or not the log source is enabled to send to Firehose """ if not cls._ENABLED_LOGS: LOGGER.error('Enabled logs not loaded') return False return cls.firehose_log_name(log_source_name) in cls._ENABLED_LOGS
def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" # Use the current length of data here so we can track failed records that are retried LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name) response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data) # Log this as an error for now so it can be picked up in logs if response['FailedPutCount'] > 0: LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount']) # Strip out the successful records so only the failed ones are retried. This happens # to the list of dictionary objects, so the called function sees the updated list self._strip_successful_records(data, response) return response
def _limit_record_size(cls, batch): """Limits the batch size sent to Firehose by popping large records Args: batch (list): Record batch to iterate on """ for index, record in enumerate(batch): if len(json.dumps(record, separators=(",", ":"))) > cls.MAX_RECORD_SIZE: # Show the first 1k bytes in order to not overload CloudWatch logs LOGGER.error('The following record is too large' 'be sent to Firehose: %s', str(record)[:1000]) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, 1) batch.pop(index)
def load_stream_payload(service, entity, raw_record): """Returns the right StreamPayload subclass for this service Args: service (str): service name to load class for entity (str): entity for this service raw_record (str): record raw payload data """ payload_map = {'s3': S3Payload, 'sns': SnsPayload, 'kinesis': KinesisPayload} if service not in payload_map: LOGGER.error('Service payload not supported: %s', service) return return payload_map[service](raw_record=raw_record, entity=entity)
def _process_alerts(self, payload): """Process records for alerts and send them to the correct places Args: payload (StreamPayload): StreamAlert payload object being processed """ for record in payload.pre_parse(): # Increment the processed size using the length of this record self._processed_size += len(record.pre_parsed_record) self.classifier.classify_record(record) if not record.valid: if self.env['lambda_alias'] != 'development': LOGGER.error('Record does not match any defined schemas: %s\n%s', record, record.pre_parsed_record) self._failed_record_count += 1 continue LOGGER.debug( 'Classified and Parsed Payload: <Valid: %s, Log Source: %s, Entity: %s>', record.valid, record.log_source, record.entity) record_alerts = StreamRules.process(record) LOGGER.debug('Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(record_alerts)) # Add all parsed records to the categorized payload dict # only if Firehose is enabled if self.firehose_client: # Only send payloads with enabled types if payload.log_source.split(':')[0] not in self.config['global'] \ ['infrastructure'].get('firehose', {}).get('disabled_logs', []): self.categorized_payloads[payload.log_source].extend(payload.records) if not record_alerts: continue # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(record_alerts) if self.enable_alert_processor: self.sinker.sink(record_alerts)
def load_enabled_log_sources(cls, firehose_config, log_sources, force_load=False): """Load and expand all declared and enabled Firehose log sources Args: firehose_config (dict): Loaded Firehose config from global.json log_sources (dict): Loaded logs.json file force_load (bool=False): Set to True if the log sources should be reloaded even if there is cached values Returns: dict: Enabled logs, key: sanitized table name, value: log type value """ # Do not reload the logs if they are already cached if cls._ENABLED_LOGS and not force_load: return cls._ENABLED_LOGS # Nothing to load if no configs passed if not (firehose_config and log_sources): return cls._ENABLED_LOGS # Expand enabled logs into specific subtypes for enabled_log in firehose_config.get('enabled_logs', {}): enabled_log_parts = enabled_log.split(':') # Expand to all subtypes if len(enabled_log_parts) == 1: expanded_logs = {cls.firehose_log_name(log_name): enabled_log for log_name in log_sources if log_name.split(':')[0] == enabled_log_parts[0]} if not expanded_logs: LOGGER.error('Enabled Firehose log %s not declared in logs.json', enabled_log) cls._ENABLED_LOGS.update(expanded_logs) elif len(enabled_log_parts) == 2: if enabled_log not in log_sources: LOGGER.error('Enabled Firehose log %s not declared in logs.json', enabled_log) continue cls._ENABLED_LOGS[cls.firehose_log_name('_'.join(enabled_log_parts))] = enabled_log return cls._ENABLED_LOGS
def parse(self, schema, data): """Parse a key value string into a dictionary. Args: schema (dict): Parsing schema. data (str): Data to be parsed. Returns: list: A list of dictionaries representing parsed records OR False if the columns do not match. """ # get the delimiter (character between key/value pairs) and the # separator (the character between keys and values) delimiter = self.options.get('delimiter', self.__default_delimiter) separator = self.options.get('separator', self.__default_separator) kv_payload = {} try: # remove any blank strings that may exist in our list fields = [field for field in data.split(delimiter) if field] # first check the field length matches our # of keys if len(fields) != len(schema): return False regex = re.compile('.+{}.+'.format(separator)) for index, field in enumerate(fields): # verify our fields match the kv regex if regex.match(field): key, value = field.split(separator) # handle duplicate keys if key in kv_payload: # load key from our configuration kv_payload[schema.keys()[index]] = value else: # load key from data kv_payload[key] = value else: LOGGER.error('key/value regex failure for %s', field) return [kv_payload] except UnicodeDecodeError: return False
def _process_ioc(self, ioc_collections): """Check if any info is malicious by querying DynamoDB IOC table Args: ioc_collections (list): A list of StreamIoc instances. """ LOGGER.debug('[Threat Inel] Rule Processor queries %d IOCs', len(ioc_collections)) # Segment data before calling DynamoDB table with batch_get_item. for subset in self._segment(ioc_collections): query_values = [] for ioc in subset: if ioc.value not in query_values: query_values.append(ioc.value) query_result = [] query_error_msg = 'An error occurred while quering dynamodb table. Error is: %s' try: result, unprocesed_keys = self._query(query_values) query_result.extend(result) except ClientError as err: LOGGER.error(query_error_msg, err.response) return except ParamValidationError as err: LOGGER.error(query_error_msg, err) return # If there are unprocessed keys, we will re-query once with unprocessed # keys only if unprocesed_keys: deserializer = self._deserialize( unprocesed_keys[self._table]['Keys']) query_values = [elem[PRIMARY_KEY] for elem in deserializer] query_error_msg = 'An error occurred while processing unprocesed_keys. Error is: %s' try: result, _ = self._query(query_values) query_result.extend(result) except ClientError as err: LOGGER.error(query_error_msg, err.response) return except ParamValidationError as err: LOGGER.error(query_error_msg, err) return for value in ioc_collections: for ioc in query_result: if value.value == ioc[PRIMARY_KEY]: value.sub_type = ioc[SUB_TYPE_KEY] value.is_ioc = True continue
def _add_optional_keys(json_records, schema, optional_keys): """Add optional keys to a parsed JSON record. Args: json_records (list): JSONPath extracted JSON records schema (dict): The log type schema optional_keys (dict): The optional keys in the schema """ if not optional_keys: return def _default_optional_values(key): """Return a default value for a given schema type""" if key == 'string': return str() elif key == 'integer': return int() elif key == 'float': return float() elif key == 'boolean': return bool() elif key == []: return list() elif key == OrderedDict(): return dict() for key_name in optional_keys: # Instead of doing a schema.update() here with a default value type, # we should enforce having any optional keys declared within the schema # and log an error if that is not the case if key_name not in schema: LOGGER.error( 'Optional top level key \'%s\' ' 'not found in declared log schema', key_name) continue # If the optional key isn't in our parsed json payload for record in json_records: if key_name not in record: # Set default value record[key_name] = _default_optional_values( schema[key_name])
def _check_schema_match(schema_matches): """Check to see if the log matches multiple schemas. If so, fall back on using log_patterns to look for the proper log. If no log_patterns exist, or they do not resolve the problem, fall back on using the first matched schema. Args: schema_matches (list): A list of tuples containing the info for schemas that have validly parsed this record. Each tuple is: (log_name, parser, parsed_data) Returns: tuple: The proper tuple to use for parsing from the list of tuples """ # If there is only one parse or we do not have support for multiple schemas # enabled, then just return the first parse that was valid if len(schema_matches) == 1 or not SUPPORT_MULTIPLE_SCHEMA_MATCHING: return schema_matches[0] matches = [] for i, schema_match in enumerate(schema_matches): log_patterns = schema_match.parser.options.get('log_patterns', {}) LOGGER.debug('Log patterns: %s', log_patterns) if (all( schema_match.parser.matched_log_pattern( data, log_patterns) for data in schema_match.parsed_data)): matches.append(schema_matches[i]) else: if LOGGER_DEBUG_ENABLED: LOGGER.debug( 'Log pattern matching failed for:\n%s', json.dumps(schema_match.parsed_data, indent=2)) if matches: if len(matches) > 1: LOGGER.error('Log patterns matched for multiple schemas: %s', ', '.join(match.log_name for match in matches)) LOGGER.error('Proceeding with schema for: %s', matches[0].log_name) return matches[0] LOGGER.error('Log classification matched for multiple schemas: %s', ', '.join(match.log_name for match in schema_matches)) LOGGER.error('Proceeding with schema for: %s', schema_matches[0].log_name) return schema_matches[0]
def run(self, event): """StreamAlert Lambda function handler. Loads the configuration for the StreamAlert function which contains: available data sources, log formats, parser modes, and sinks. Classifies logs sent into the stream into a parsed type. Matches records against rules. Args: event: An AWS event mapped to a specific source/entity (kinesis stream or an s3 bucket event) containing data emitted to the stream. Returns: bool: True if all logs being parsed match a schema """ records = event.get('Records', []) LOGGER.debug('Number of Records: %d', len(records)) if not records: return False MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, len(records)) for raw_record in records: # Get the service and entity from the payload. If the service/entity # is not in our config, log and error and go onto the next record service, entity = self.classifier.extract_service_and_entity( raw_record) if not service: LOGGER.error( 'No valid service found in payload\'s raw record. Skipping ' 'record: %s', raw_record) continue if not entity: LOGGER.error( 'Unable to extract entity from payload\'s raw record for service %s. ' 'Skipping record: %s', service, raw_record) continue # Cache the log sources for this service and entity on the classifier if not self.classifier.load_sources(service, entity): continue # Create the StreamPayload to use for encapsulating parsed info payload = load_stream_payload(service, entity, raw_record) if not payload: continue self._process_alerts(payload) LOGGER.debug('Invalid record count: %d', self._failed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count) LOGGER.debug('%s alerts triggered', len(self._alerts)) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(self._alerts)) # Check if debugging logging is on before json dumping alerts since # this can be time consuming if there are a lot of alerts if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG): LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2)) return self._failed_record_count == 0
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ resp = {} record_batch_size = len(record_batch) exceptions_to_backoff = (ClientError, ConnectionError) @backoff.on_predicate(backoff.fibo, lambda resp: resp['FailedPutCount'] > 0, max_tries=self.MAX_BACKOFF_ATTEMPTS, max_value=self.MAX_BACKOFF_FIBO_VALUE, jitter=backoff.full_jitter, on_backoff=backoff_handler, on_success=success_handler, on_giveup=giveup_handler) @backoff.on_exception(backoff.fibo, exceptions_to_backoff, max_tries=self.MAX_BACKOFF_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler, on_success=success_handler, on_giveup=giveup_handler) def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" LOGGER.info('[Firehose] Sending %d records to %s', record_batch_size, stream_name) return self._firehose_client.put_record_batch( DeliveryStreamName=stream_name, Records=data) # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. records_data = [ {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch ] # The try/except here is to catch the raised error at the # end of the backoff. try: resp = firehose_request_wrapper(records_data) except exceptions_to_backoff as firehose_err: LOGGER.error(firehose_err) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, record_batch_size) return # Error handle if failures occurred in PutRecordBatch after # several backoff attempts if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('[Firehose] The following records failed to put to ' 'the Delivery Stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, record_batch_size) LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]', record_batch_size, stream_name, resp.get('ResponseMetadata', {}).get('RequestId', ''))
def _send_to_firehose(self): """Send all classified records to a respective Firehose Delivery Stream""" def _chunk(record_list, chunk_size): """Helper function to chunk payloads""" for item in range(0, len(record_list), chunk_size): yield record_list[item:item + chunk_size] def _check_record_batch(batch): """Helper function to verify record size""" for index, record in enumerate(batch): if len(str(record)) > MAX_RECORD_SIZE: # Show the first 1k bytes in order to not overload # CloudWatch logs LOGGER.error('The following record is too large' 'be sent to Firehose: %s', str(record)[:1000]) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, 1) batch.pop(index) delivery_stream_name_pattern = 'streamalert_data_{}' # Iterate through each payload type for log_type, records in self.categorized_payloads.items(): # This same method is used when naming the Delivery Streams formatted_log_type = log_type.replace(':', '_') for record_batch in _chunk(records, MAX_BATCH_SIZE): stream_name = delivery_stream_name_pattern.format(formatted_log_type) _check_record_batch(record_batch) resp = self.firehose_client.put_record_batch( DeliveryStreamName=stream_name, # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. Records=[{'Data': json.dumps(record, separators=(",", ":")) + '\n'} for record in record_batch]) # Error handle if failures occured # TODO(jack) implement backoff here once the rule processor is split if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records LOGGER.error('The following records failed to Put to the' 'Delivery stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, len(record_batch)) LOGGER.info('Successfully sent %d messages to Firehose:%s', len(record_batch), stream_name)
def run(self, event): """StreamAlert Lambda function handler. Loads the configuration for the StreamAlert function which contains available data sources, log schemas, normalized types, and outputs. Classifies logs sent into a parsed type. Matches records against rules. Args: event (dict): An AWS event mapped to a specific source/entity containing data read by Lambda. Returns: bool: True if all logs being parsed match a schema """ records = event.get('Records', []) LOGGER.debug('Number of Records: %d', len(records)) if not records: return False MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, len(records)) firehose_config = self.config['global'].get( 'infrastructure', {}).get('firehose', {}) if firehose_config.get('enabled'): self.firehose_client = boto3.client('firehose', region_name=self.env['lambda_region']) for raw_record in records: # Get the service and entity from the payload. If the service/entity # is not in our config, log and error and go onto the next record service, entity = self.classifier.extract_service_and_entity(raw_record) if not service: LOGGER.error('No valid service found in payload\'s raw record. Skipping ' 'record: %s', raw_record) continue if not entity: LOGGER.error( 'Unable to extract entity from payload\'s raw record for service %s. ' 'Skipping record: %s', service, raw_record) continue # Cache the log sources for this service and entity on the classifier if not self.classifier.load_sources(service, entity): continue # Create the StreamPayload to use for encapsulating parsed info payload = load_stream_payload(service, entity, raw_record) if not payload: continue self._process_alerts(payload) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_PROCESSED_SIZE, self._processed_size) LOGGER.debug('Invalid record count: %d', self._failed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count) LOGGER.debug('%s alerts triggered', len(self._alerts)) MetricLogger.log_metric( FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len( self._alerts)) # Check if debugging logging is on before json dumping alerts since # this can be time consuming if there are a lot of alerts if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG): LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2)) if self.firehose_client: self._send_to_firehose() return self._failed_record_count == 0
def run(self, event): """StreamAlert Lambda function handler. Loads the configuration for the StreamAlert function which contains available data sources, log schemas, normalized types, and outputs. Classifies logs sent into a parsed type. Matches records against rules. Args: event (dict): An AWS event mapped to a specific source/entity containing data read by Lambda. Returns: bool: True if all logs being parsed match a schema """ records = event.get('Records', []) LOGGER.debug('Number of incoming records: %d', len(records)) if not records: return False firehose_config = self.config['global'].get('infrastructure', {}).get('firehose', {}) if firehose_config.get('enabled'): self._firehose_client = StreamAlertFirehose( self.env['lambda_region'], firehose_config, self.config['logs']) payload_with_normalized_records = [] for raw_record in records: # Get the service and entity from the payload. If the service/entity # is not in our config, log and error and go onto the next record service, entity = self.classifier.extract_service_and_entity( raw_record) if not service: LOGGER.error( 'No valid service found in payload\'s raw record. Skipping ' 'record: %s', raw_record) continue if not entity: LOGGER.error( 'Unable to extract entity from payload\'s raw record for service %s. ' 'Skipping record: %s', service, raw_record) continue # Cache the log sources for this service and entity on the classifier if not self.classifier.load_sources(service, entity): continue # Create the StreamPayload to use for encapsulating parsed info payload = load_stream_payload(service, entity, raw_record) if not payload: continue payload_with_normalized_records.extend( self._process_alerts(payload)) # Log normalized records metric MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.NORMALIZED_RECORDS, len(payload_with_normalized_records)) # Apply Threat Intel to normalized records in the end of Rule Processor invocation record_alerts = self._rules_engine.threat_intel_match( payload_with_normalized_records) self._alerts.extend(record_alerts) if record_alerts: self.alert_forwarder.send_alerts(record_alerts) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS, self._processed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_PROCESSED_SIZE, self._processed_size) LOGGER.debug('Invalid record count: %d', self._failed_record_count) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES, self._failed_record_count) LOGGER.debug('%s alerts triggered', len(self._alerts)) MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS, len(self._alerts)) # Check if debugging logging is on before json dumping alerts since # this can be time consuming if there are a lot of alerts if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG): LOGGER.debug( 'Alerts:\n%s', json.dumps([alert.output_dict() for alert in self._alerts], indent=2, sort_keys=True)) if self._firehose_client: self._firehose_client.send() # Only log rule info here if this is not running tests # During testing, this gets logged at the end and printing here could be confusing # since stress testing calls this method multiple times if self.env['lambda_alias'] != 'development': stats.print_rule_stats(True) return self._failed_record_count == 0
def _firehose_request_helper(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ exceptions_to_backoff = (ClientError, ConnectionError, Timeout) @backoff.on_predicate(backoff.fibo, lambda resp: resp['FailedPutCount'] > 0, max_tries=self.MAX_BACKOFF_ATTEMPTS, max_value=self.MAX_BACKOFF_FIBO_VALUE, jitter=backoff.full_jitter, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) @backoff.on_exception(backoff.fibo, exceptions_to_backoff, max_tries=self.MAX_BACKOFF_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) def firehose_request_wrapper(data): """Firehose request wrapper to use with backoff""" # Use the current length of data here so we can track failed records that are retried LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name) response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data) # Log this as an error for now so it can be picked up in logs if response['FailedPutCount'] > 0: LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount']) # Strip out the successful records so only the failed ones are retried. This happens # to the list of dictionary objects, so the called function sees the updated list self._strip_successful_records(data, response) return response original_batch_size = len(record_batch) # The newline at the end is required by Firehose, # otherwise all records will be on a single line and # unsearchable in Athena. records_data = [ {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'} for record in record_batch ] # The try/except here is to catch the raised error at the end of the backoff try: resp = firehose_request_wrapper(records_data) except exceptions_to_backoff as firehose_err: LOGGER.error(firehose_err) # Use the current length of the records_data in case some records were # successful but others were not MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, len(records_data)) return # Error handle if failures occurred in PutRecordBatch after # several backoff attempts if resp.get('FailedPutCount') > 0: failed_records = [failed for failed in resp['RequestResponses'] if failed.get('ErrorCode')] MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, resp['FailedPutCount']) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error('[Firehose] The following records failed to put to ' 'the Delivery Stream %s: %s', stream_name, json.dumps(failed_records[:100], indent=2)) else: MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, original_batch_size) LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]', original_batch_size, stream_name, resp.get('ResponseMetadata', {}).get('RequestId', ''))
def _convert_type(cls, payload, schema): """Convert a parsed payload's values into their declared types. If the schema is incorrectly defined for a particular field, this function will return False which will make the payload invalid. Args: payload (dict): Parsed payload dict schema (dict): data schema for a specific log source Returns: dict: parsed dict payload with typed values """ for key, value in schema.iteritems(): key = str(key) # if the schema value is declared as string if value == 'string': try: payload[key] = str(payload[key]) except UnicodeEncodeError: payload[key] = unicode(payload[key]) # if the schema value is declared as integer elif value == 'integer': try: payload[key] = int(payload[key]) except ValueError: LOGGER.error( 'Invalid schema. Value for key [%s] is not an int: %s', key, payload[key]) return False elif value == 'float': try: payload[key] = float(payload[key]) except ValueError: LOGGER.error( 'Invalid schema. Value for key [%s] is not a float: %s', key, payload[key]) return False elif value == 'boolean': payload[key] = str(payload[key]).lower() == 'true' elif isinstance(value, dict): if not value: continue # allow empty maps (dict) # Skip the values for the 'streamalert:envelope_keys' key that we've # added during parsing if the do not conform to being a dict if key == 'streamalert:envelope_keys' and not isinstance( payload[key], dict): continue cls._convert_type(payload[key], schema[key]) elif isinstance(value, list): pass else: LOGGER.error('Unsupported schema type: %s', value) return True