def _send_batch(self, stream_name, record_batch): """Send record batches to Firehose Args: stream_name (str): The name of the Delivery Stream to send to record_batch (list): The records to send """ @backoff.on_predicate(backoff.fibo, lambda resp: resp['FailedPutCount'] > 0, max_tries=self.MAX_BACKOFF_ATTEMPTS, max_value=self.MAX_BACKOFF_FIBO_VALUE, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) @backoff.on_exception(backoff.fibo, self.EXCEPTIONS_TO_BACKOFF, max_tries=self.MAX_BACKOFF_ATTEMPTS, on_backoff=backoff_handler(debug_only=False), on_success=success_handler(), on_giveup=giveup_handler()) def _firehose_request_helper(data): """Firehose request wrapper to use with backoff""" # Use the current length of data here so we can track failed records that are retried LOGGER.debug('Sending %d records to firehose %s', len(data), stream_name) response = self._client.put_record_batch( DeliveryStreamName=stream_name, Records=data) # Log this as an error for now so it can be picked up in logs if response['FailedPutCount'] > 0: LOGGER.warning('Received non-zero FailedPutCount: %d', response['FailedPutCount']) # Strip out the successful records so only the failed ones are retried. This happens # to the list of dictionary objects, so the called function sees the updated list self._strip_successful_records(data, response) return response # The record here already contains a newline, so do not append one records_data = [{'Data': record} for record in record_batch] # The try/except here is to catch the raised error at the end of the backoff try: return _firehose_request_helper(records_data) except self.EXCEPTIONS_TO_BACKOFF: LOGGER.exception('Firehose request failed') # Use the current length of the records_data in case some records were # successful but others were not self._log_failed(len(records_data))
def _dispatch(self, alert, descriptor): """Send alert to a Kinesis Firehose Delivery Stream Publishing: By default this output sends the current publication in JSON to Kinesis. There is no "magic" field to "override" it: Simply publish what you want to send! Args: alert (Alert): Alert instance which triggered a rule descriptor (str): Output descriptor Returns: bool: True if alert was sent successfully, False otherwise """ @backoff.on_exception(backoff.fibo, ClientError, max_tries=self.MAX_BACKOFF_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler(), on_success=success_handler(), on_giveup=giveup_handler()) def _firehose_request_wrapper(json_alert, delivery_stream): """Make the PutRecord request to Kinesis Firehose with backoff Args: json_alert (str): The JSON dumped alert body delivery_stream (str): The Firehose Delivery Stream to send to Returns: dict: Firehose response in the format below {'RecordId': 'string'} """ self.__aws_client__.put_record(DeliveryStreamName=delivery_stream, Record={'Data': json_alert}) if self.__aws_client__ is None: self.__aws_client__ = boto3.client('firehose', region_name=self.region) publication = compose_alert(alert, self, descriptor) json_alert = json.dumps(publication, separators=(',', ':')) + '\n' if len(json_alert) > self.MAX_RECORD_SIZE: LOGGER.error('Alert too large to send to Firehose: \n%s...', json_alert[0:1000]) return False delivery_stream = self.config[self.__service__][descriptor] LOGGER.info('Sending %s to aws-firehose:%s', alert, delivery_stream) _firehose_request_wrapper(json_alert, delivery_stream) LOGGER.info('%s successfully sent to aws-firehose:%s', alert, delivery_stream) return True
def real_decorator(func): """Actual decorator to retry on exceptions""" @backoff.on_exception(backoff.expo, exceptions, # This is a tuple with exceptions max_tries=OutputDispatcher.MAX_RETRY_ATTEMPTS, jitter=backoff.full_jitter, on_backoff=backoff_handler(), on_success=success_handler(), on_giveup=giveup_handler()) def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper
def check_query_status(self, execution_id): """Check in on the running query, back off if the job is running or queued Args: query_execution_id (str): Athena query execution ID Returns: bool: True if the query state is SUCCEEDED, False otherwise Reference https://bit.ly/2uuRtda. Raises: AthenaQueryExecutionError: If any failure occurs while checking the status of the query, this exception will be raised """ LOGGER.debug('Checking status of query with execution ID: %s', execution_id) states_to_backoff = {'QUEUED', 'RUNNING'} @backoff.on_predicate(backoff.fibo, lambda resp: \ resp['QueryExecution']['Status']['State'] in states_to_backoff, max_value=10, jitter=backoff.full_jitter, on_backoff=backoff_handler(), on_success=success_handler(True)) def _check_status(query_execution_id): return self._client.get_query_execution( QueryExecutionId=query_execution_id ) execution_result = _check_status(execution_id) state = execution_result['QueryExecution']['Status']['State'] if state == 'SUCCEEDED': return # When the state is not SUCCEEDED, something bad must have occurred, so raise an exception reason = execution_result['QueryExecution']['Status']['StateChangeReason'] raise AthenaQueryExecutionError( 'Query \'{}\' {} with reason \'{}\', exiting'.format(execution_id, state, reason) )
def _query(self, values): """Instance method to query DynamoDB table Args: values (list): A list of string which contains IOC values Returns: A tuple(list, dict) list: A list of dict returned from dynamodb table query, in the format of [ {'sub_type': 'c2_domain', 'ioc_value': 'evil.com'}, {'sub_type': 'mal_ip', 'ioc_value': '1.1.1.2'}, ] dict: A dict containing unprocesed keys. """ @backoff.on_predicate( backoff.fibo, lambda resp: bool(resp['UnprocessedKeys'] ), # retry if this is true max_tries=2, # only retry unprocessed key 2 times max on_backoff=backoff_handler(), on_success=success_handler(), on_giveup=giveup_handler()) @backoff.on_exception(backoff.expo, self.EXCEPTIONS_TO_BACKOFF, max_tries=self.BACKOFF_MAX_RETRIES, giveup=self._exceptions_to_giveup, on_backoff=backoff_handler(), on_success=success_handler(), on_giveup=giveup_handler()) def _run_query(query_values, results): query_keys = [{ self.PRIMARY_KEY: { 'S': ioc } } for ioc in query_values if ioc] response = self._dynamodb.batch_get_item( RequestItems={ self._table: { 'Keys': query_keys, 'ProjectionExpression': self.PROJECTION_EXPRESSION } }) results.extend( self._deserialize(response['Responses'].get(self._table))) # Log this as an error for now so it can be picked up in logs if response['UnprocessedKeys']: LOGGER.error('Retrying unprocessed keys in response: %s', response['UnprocessedKeys']) # Strip out the successful keys so only the unprocesed ones are retried. # This changes the list in place, so the called function sees the updated list self._remove_processed_keys( query_values, response['UnprocessedKeys'][self._table]['Keys']) return response results = [] _run_query(values, results) return results
class AlertProcessor: """Orchestrates delivery of alerts to the appropriate dispatchers.""" ALERT_PROCESSOR = None # AlertProcessor instance which can be re-used across Lambda invocations BACKOFF_MAX_TRIES = 5 @classmethod def get_instance(cls): """Get an instance of the AlertProcessor, using a cached version if possible.""" if not cls.ALERT_PROCESSOR: cls.ALERT_PROCESSOR = AlertProcessor() return cls.ALERT_PROCESSOR def __init__(self): """Initialization logic that can be cached across invocations""" # Merge user-specified output configuration with the required output configuration output_config = load_config(include={'outputs.json'})['outputs'] self.config = resources.merge_required_outputs( output_config, env['STREAMALERT_PREFIX']) self.alerts_table = AlertTable(env['ALERTS_TABLE']) def _create_dispatcher(self, output): """Create a dispatcher for the given output. Args: output (str): Alert output, e.g. "aws-sns:topic-name" Returns: OutputDispatcher: Based on the output type. Returns None if the output is invalid or not defined in the config. """ try: service, descriptor = output.split(':') except ValueError: LOGGER.error( 'Improperly formatted output [%s]. Outputs for rules must ' 'be declared with both a service and a descriptor for the ' 'integration (ie: \'slack:my_channel\')', output) return None if service not in self.config or descriptor not in self.config[service]: LOGGER.error('The output \'%s\' does not exist!', output) return None return StreamAlertOutput.create_dispatcher(service, self.config) def _send_to_outputs(self, alert): """Send an alert to each remaining output. Args: alert (Alert): Alert to send Returns: dict: Maps output (str) to whether it sent successfully (bool) """ result = {} for output in alert.remaining_outputs: dispatcher = self._create_dispatcher(output) result[output] = dispatcher.dispatch( alert, output) if dispatcher else False alert.outputs_sent = set(output for output, success in list(result.items()) if success) return result @backoff.on_exception(backoff.expo, ClientError, max_tries=BACKOFF_MAX_TRIES, jitter=backoff.full_jitter, on_backoff=backoff_handlers.backoff_handler(), on_success=backoff_handlers.success_handler(), on_giveup=backoff_handlers.giveup_handler()) def _update_table(self, alert, output_results): """Update the alerts table based on the results of the outputs. Args: alert (Alert): Alert instance which was sent output_results (dict): Maps output (str) to whether it sent successfully (bool) """ if not output_results: return if all(output_results.values()) and not alert.merge_enabled: # All outputs sent successfully and the alert will not be merged later - delete it now self.alerts_table.delete_alerts([(alert.rule_name, alert.alert_id) ]) elif any(output_results.values()): # At least one output succeeded - update table accordingly self.alerts_table.update_sent_outputs(alert) # else: If all outputs failed, no table updates are necessary def run(self, event): """Run the alert processor! Args: event (dict): Lambda invocation event containing at least the rule name and alert ID. Returns: dict: Maps output (str) to whether it sent successfully (bool). An empty dict is returned if the Alert was improperly formatted. """ # Grab the alert record from Dynamo (if needed). if set(event) == {'AlertID', 'RuleName'}: LOGGER.info('Retrieving %s from alerts table', event) alert_record = self.alerts_table.get_alert_record( event['RuleName'], event['AlertID']) if not alert_record: LOGGER.error('%s does not exist in the alerts table', event) return {} else: alert_record = event # Convert record to an Alert instance. try: alert = Alert.create_from_dynamo_record(alert_record) except AlertCreationError: LOGGER.exception('Invalid alert %s', event) return {} # Remove normalization key from the record. # TODO: Consider including this in at least some outputs, e.g. default Athena firehose if Normalizer.NORMALIZATION_KEY in alert.record: del alert.record[Normalizer.NORMALIZATION_KEY] result = self._send_to_outputs(alert) self._update_table(alert, result) return result
class ThreatStream: """Class to retrieve IOCs from ThreatStream.com and store them in DynamoDB""" _API_URL = 'https://api.threatstream.com' _API_RESOURCE = 'intelligence' _IOC_STATUS = 'active' # max IOC objects received from one API call, default is 0 (equal to 1000) _API_MAX_LIMIT = 1000 _API_MAX_INDEX = 500000 # Remaining time in seconds before lambda termination _END_TIME_BUFFER = 5 CRED_PARAMETER_NAME = 'threat_intel_downloader_api_creds' EXCEPTIONS_TO_BACKOFF = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, ThreatStreamRequestsError) BACKOFF_MAX_RETRIES = 3 def __init__(self, function_arn, timing_func): self._config = self._load_config(function_arn) self.timing_func = timing_func self.api_user = None self.api_key = None @staticmethod def _load_config(function_arn): """Load the Threat Intel Downloader configuration from conf/lambda.json file Returns: (dict): Configuration for Threat Intel Downloader Raises: ConfigError: For invalid or missing configuration files. """ base_config = parse_lambda_arn(function_arn) config = load_config(include={'lambda.json'})['lambda'] base_config.update(config.get('threat_intel_downloader_config', {})) return base_config def _load_api_creds(self): """Retrieve ThreatStream API credentials from Parameter Store""" if self.api_user and self.api_key: return # credentials already loaded from SSM try: ssm = boto3.client('ssm', self.region) response = ssm.get_parameter(Name=self.CRED_PARAMETER_NAME, WithDecryption=True) except ClientError: LOGGER.exception('Failed to get SSM parameters') raise if not response: raise ThreatStreamCredsError('Invalid response') try: decoded_creds = json.loads(response['Parameter']['Value']) except ValueError: raise ThreatStreamCredsError( 'Cannot load value for parameter with name ' '\'{}\'. The value is not valid json: ' '\'{}\''.format(response['Parameter']['Name'], response['Parameter']['Value'])) self.api_user = decoded_creds['api_user'] self.api_key = decoded_creds['api_key'] if not (self.api_user and self.api_key): raise ThreatStreamCredsError('API Creds Error') @backoff.on_exception(backoff.constant, EXCEPTIONS_TO_BACKOFF, max_tries=BACKOFF_MAX_RETRIES, on_backoff=backoff_handler(), on_success=success_handler(), on_giveup=giveup_handler()) def _connect(self, next_url): """Send API call to ThreatStream with next token and return parsed IOCs The API call has retry logic up to 3 times. Args: next_url (str): url of next token to retrieve more objects from ThreatStream """ intelligence = list() https_req = requests.get('{}{}'.format(self._API_URL, next_url), timeout=10) next_url = None if https_req.status_code == 200: data = https_req.json() if data.get('objects'): intelligence.extend(self._process_data(data['objects'])) LOGGER.info('IOC Offset: %d', data['meta']['offset']) if not (data['meta']['next'] and data['meta']['offset'] < self.threshold): LOGGER.debug( 'Either next token is empty or IOC offset reaches threshold ' '%d. Stop retrieve more IOCs.', self.threshold) else: next_url = data['meta']['next'] elif https_req.status_code == 401: raise ThreatStreamRequestsError( 'Response status code 401, unauthorized.') elif https_req.status_code == 500: raise ThreatStreamRequestsError( 'Response status code 500, retry now.') else: raise ThreatStreamRequestsError( 'Unknown status code {}, do not retry.'.format( https_req.status_code)) self._finalize(intelligence, next_url) def _finalize(self, intel, next_url): """Finalize the execution Send data to dynamo and continue the invocation if necessary. Arguments: intel (list): List of intelligence to send to DynamoDB next_url (str): Next token to retrieve more IOCs continue_invoke (bool): Whether to retrieve more IOCs from threat feed. False if next token is empty or threshold of number of IOCs is reached. """ if intel: LOGGER.info('Write %d IOCs to DynamoDB table', len(intel)) self._write_to_dynamodb_table(intel) if next_url and self.timing_func() > self._END_TIME_BUFFER * 1000: self._invoke_lambda_function(next_url) LOGGER.debug("Time remaining (MS): %s", self.timing_func()) def _invoke_lambda_function(self, next_url): """Invoke lambda function itself with next token to continually retrieve IOCs""" LOGGER.debug('This invocation is invoked by lambda function self.') lambda_client = boto3.client('lambda', region_name=self.region) try: lambda_client.invoke(FunctionName=self._config['function_name'], InvocationType='Event', Payload=json.dumps({'next_url': next_url}), Qualifier=self._config['qualifier']) except ClientError as err: raise ThreatStreamLambdaInvokeError( 'Error invoking function: {}'.format(err)) @staticmethod def _epoch_time(time_str, days=90): """Convert expiration time (in UTC) to epoch time Args: time_str (str): expiration time in string format Example: '2017-12-19T04:45:18.412Z' days (int): default expiration days which 90 days from now Returns: (int): Epoch time. If no expiration time presented, return to default value which is current time + 90 days. """ if not time_str: return int((datetime.utcnow() + timedelta(days) - datetime.utcfromtimestamp(0)).total_seconds()) try: utc_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ") return int( (utc_time - datetime.utcfromtimestamp(0)).total_seconds()) except ValueError: LOGGER.error('Cannot convert expiration date \'%s\' to epoch time', time_str) raise def _process_data(self, data): """Process and filter data by sources and keys Args: data (list): A list contains ioc information Example: [ { 'value': 'malicious_domain.com', 'itype': 'c2_domain', 'source': 'crowdstrike', 'type': 'domain', 'expiration_ts': '2017-12-19T04:45:18.412Z', 'key1': 'value1', 'key2': 'value2', ... }, { 'value': 'malicious_domain2.com', 'itype': 'c2_domain', 'source': 'ioc_source2', 'type': 'domain', 'expiration_ts': '2017-12-31T04:45:18.412Z', 'key1': 'value1', 'key2': 'value2', ... } ] Returns: (list): A list of dict contains useful IOC information Example: [ { 'value': 'malicious_domain.com', 'itype': 'c2_domain', 'source': 'crowdstrike', 'type': 'domain', 'expiration_ts': 1513658718, } ] """ results = list() for obj in data: for source in self.ioc_sources: if source in obj['source'].lower(): filtered_obj = { key: value for key, value in obj.items() if key in self.ioc_keys } filtered_obj['expiration_ts'] = self._epoch_time( filtered_obj['expiration_ts']) results.append(filtered_obj) return results def _write_to_dynamodb_table(self, intelligence): """Store IOCs to DynamoDB table""" try: dynamodb = boto3.resource('dynamodb', region_name=self.region) table = dynamodb.Table(self.table_name) with table.batch_writer() as batch: for ioc in intelligence: batch.put_item( Item={ 'ioc_value': ioc['value'], 'ioc_type': ioc['type'], 'sub_type': ioc['itype'], 'source': ioc['source'], 'expiration_ts': ioc['expiration_ts'] }) except ClientError as err: LOGGER.debug('DynamoDB client error: %s', err) raise def runner(self, event): """Process URL before making API call Args: event (dict): Contains lambda function invocation information. Initially, Threat Intel Downloader lambda funciton is invoked by Cloudwatch event. 'next_url' key will be inserted to event lambda function invokes itself to retrieve more IOCs. Returns: (tuple): (list, str, bool) - First object is a list of intelligence. - Second object is a string of next token to retrieve more IOCs. - Third object is bool to indicated if retrieve more IOCs from threat feed. """ event = event or {} self._load_api_creds() query = '(status="{}")+AND+({})+AND+NOT+({})'.format( self._IOC_STATUS, "+OR+".join(['type="{}"'.format(ioc) for ioc in self.ioc_types]), "+OR+".join([ 'itype="{}"'.format(itype) for itype in self.excluded_sub_types ])) next_url = event.get( 'next_url', '/api/v2/{}/?username={}&api_key={}&limit={}&q={}'.format( self._API_RESOURCE, self.api_user, self.api_key, self._API_MAX_LIMIT, query)) self._connect(next_url) @property def excluded_sub_types(self): return self._config['excluded_sub_types'] @property def ioc_keys(self): return self._config['ioc_keys'] @property def ioc_sources(self): return self._config['ioc_filters'] @property def ioc_types(self): return self._config['ioc_types'] @property def region(self): return self._config['region'] @property def table_name(self): return self._config['function_name'] @property def threshold(self): return self._API_MAX_INDEX - self._API_MAX_LIMIT
def test_success_handler_info(log_mock): """Backoff Handlers - Success Handler, Info""" on_success = success_handler() on_success(_get_details()) log_mock.assert_called()
def test_success_handler_debug(log_mock): """Backoff Handlers - Success Handler, Debug""" on_success = success_handler(True) on_success(_get_details()) log_mock.assert_called()