def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: # Ignore all UPX output subprocess.check_output(['./upx', '-q', '-d', target_file], stderr=subprocess.STDOUT) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file # Raw YARA matches (yara-python) raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path)) yara_python_matches = [ YaraMatch(m.rule, m.namespace, m.meta, set(t[1] for t in m.strings)) for m in raw_yara_matches ] return yara_python_matches + self._yextend_matches(target_file)
def _s3_objects( s3_records: List[Dict[str, Any]]) -> Generator[Tuple[str, str], None, None]: """Build list of objects in the given S3 record. Args: s3_records: List of S3 event records: [ { 's3': { 'object': { 'key': (str) }, 'bucket': { 'name': (str) } } }, ... ] Yields: (bucket_name, object_key) string tuple """ for record in s3_records: try: bucket_name = record['s3']['bucket']['name'] object_key = urllib.parse.unquote_plus( record['s3']['object']['key']) yield bucket_name, object_key except (KeyError, TypeError): LOGGER.exception('Skipping invalid S3 record %s', record)
def put_metric_data(num_yara_rules: int, binaries: List[BinaryInfo]) -> None: """Publish custom metric data to CloudWatch. Args: num_yara_rules: Number of YARA rules in the analyzer. binaries: List of analyzed BinaryInfo()s. """ LOGGER.debug('Sending metric data') metric_data = [ { 'MetricName': 'AnalyzedBinaries', 'Value': len(binaries), 'Unit': 'Count' }, { 'MetricName': 'MatchedBinaries', 'Value': sum(1 for b in binaries if b.yara_matches), 'Unit': 'Count' }, { 'MetricName': 'YaraRules', 'Value': num_yara_rules, 'Unit': 'Count' }, { 'MetricName': 'S3DownloadLatency', 'StatisticValues': _compute_statistics([b.download_time_ms for b in binaries]), 'Unit': 'Milliseconds' } ] CLOUDWATCH.put_metric_data(Namespace='BinaryAlert', MetricData=metric_data)
def _objects_to_analyze( event: Dict[str, Any]) -> Generator[Tuple[str, str], None, None]: """Parse the invocation event into a list of objects to analyze. Args: event: Invocation event (SQS message whose message body is an S3 event notification) Yields: (bucket_name, object_key) string tuples to analyze """ if 'BucketName' in event and 'ObjectKeys' in event: # Direct (simple) invocation for key in event['ObjectKeys']: yield event['BucketName'], urllib.parse.unquote_plus(key) return # SQS message invocation for sqs_message in event['Records']: try: s3_records = json.loads(sqs_message['body'])['Records'] except (KeyError, TypeError, json.JSONDecodeError): LOGGER.exception('Skipping invalid SQS message %s', json.dumps(sqs_message)) continue for s3_message in s3_records: yield (s3_message['s3']['bucket']['name'], urllib.parse.unquote_plus( s3_message['s3']['object']['key']))
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # Raw YARA matches (yara-python) # TODO: Once yextend is more robust, we may eventually not need yara-python anymore. raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path) ) yara_python_matches = [ YaraMatch(m.rule, m.namespace, m.meta, set(t[1] for t in m.strings)) for m in raw_yara_matches ] # Yextend matches os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] yextend_output = None try: yextend_output = subprocess.check_output( ['./yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j']) yextend_list = json.loads(yextend_output.decode('utf-8')) return yara_python_matches + _convert_yextend_to_yara_match(yextend_list[0]) except Exception: # pylint: disable=broad-except # If yextend fails for any reason, still return the yara-python match results. LOGGER.exception('Error running yextend or parsing its output') if yextend_output: LOGGER.error('yextend output: <%s>', yextend_output) return yara_python_matches
def safe_alert_only( self, sns_topic_arn: str) -> None: """Publish an alert to SNS . Args: sns_topic_arn: Publish match alerts to this SNS topic ARN. """ LOGGER.info('Publishing an SNS alert') analyzer_aws_lib.publish_safe_to_sns(self, sns_topic_arn)
def _download_from_s3(self) -> None: """Download binary from S3 and measure elapsed time.""" LOGGER.debug('Downloading %s to %s', self.object_key, self.download_path) start_time = time.time() self.s3_last_modified, self.s3_metadata = analyzer_aws_lib.download_from_s3( self.bucket_name, self.object_key, self.download_path) self.download_time_ms = (time.time() - start_time) * 1000
def _add_s3_key(self, binary: BinaryInfo, analyzer_version: int) -> None: """Add S3 key to an existing entry. If the S3 key already exists, this is a no-op.""" LOGGER.info('Adding %s to existing entry (SHA256: %s, AnalyzerVersion: %d)', binary.s3_identifier, binary.computed_sha, analyzer_version) self._table.update_item( Key={'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version}, UpdateExpression='ADD S3Objects :s3_string_set', ExpressionAttributeValues={':s3_string_set': {binary.s3_identifier}} )
def publish_negative_match_result(self, sns_topic_arn: str) -> None: """Publish a negative match result (no YARA matches found). Args: sns_topic_arn: Target topic ARN for negative match alerts. """ LOGGER.info('Publishing a negative match result to %s', sns_topic_arn) subject = '[BiAlert] {} did not match any YARA rules'.format( self.filepath or self.computed_sha) analyzer_aws_lib.publish_to_sns(self, sns_topic_arn, subject)
def __enter__(self): """Download the binary from S3 and run YARA analysis.""" self._download_from_s3() self.computed_sha, self.computed_md5 = file_hash.compute_hashes( self.download_path) LOGGER.debug('Running YARA analysis') self.yara_matches = self.yara_analyzer.analyze( self.download_path, original_target_path=self.filepath) return self
def __enter__(self) -> Any: # mypy/typing doesn't support recursive type yet """Download the binary from S3 and run YARA analysis.""" self._download_from_s3() self.computed_sha, self.computed_md5 = file_hash.compute_hashes(self.download_path) LOGGER.debug('Running YARA analysis') self.yara_matches = self.yara_analyzer.analyze( self.download_path, original_target_path=self.filepath ) return self
def delete_sqs_messages(queue_url: str, receipts: List[str]) -> None: """Mark a batch of SQS receipts as completed (removing them from the queue). Args: queue_url: The URL of the SQS queue containing the messages. receipts: List of SQS receipt handles. """ LOGGER.info('Deleting %d SQS receipt(s) from %s', len(receipts), queue_url) SQS.Queue(queue_url).delete_messages(Entries=[{ 'Id': str(index), 'ReceiptHandle': receipt } for index, receipt in enumerate(receipts)])
def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None: """Create a new Dynamo entry with YARA match information.""" LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)', binary.computed_sha, analyzer_version) item = { 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version, 'MatchedRules': binary.matched_rule_ids, 'MD5': binary.computed_md5, 'S3LastModified': binary.s3_last_modified, 'S3Metadata': binary.s3_metadata, 'S3Objects': {binary.s3_identifier} } self._table.put_item(Item=item)
def save_matches(self, binary: BinaryInfo, analyzer_version: int) -> bool: """Save YARA match results to the Dynamo table. Args: binary: Instance containing information about the binary. analyzer_version: Version of the currently executing Lambda function. Returns: Whether an alert should be fired. Returns True if: The current Lambda version is >= the most recent analysis version AND (a) Any YARA rule is matched now that was not matched in the previous version, OR (b) A new S3 object appears which is identical to an already matched binary. """ needs_alert = False # Grab the most recent match results for the given SHA. item_tuple = self._most_recent_item(binary.computed_sha) if item_tuple is not None: # An entry already exists for this SHA. item_lambda_version, item_matched_rules, item_s3_objects, previous_objects = item_tuple # Update the DB appropriately. if analyzer_version != item_lambda_version: # This binary has never been matched by this Lambda version. self._create_new_entry(binary, analyzer_version) elif binary.s3_identifier not in item_s3_objects: # A new S3 object is identical to a previously-matched binary. self._add_s3_key(binary, analyzer_version) # Decide whether we need to alert. if analyzer_version < item_lambda_version: LOGGER.warning( 'Current Lambda version %d is < version %d from previous analysis', analyzer_version, item_lambda_version) elif bool(binary.matched_rule_ids - item_matched_rules): # A new YARA rule matched this binary. needs_alert = True elif binary.s3_identifier not in item_s3_objects.union( previous_objects): # A new S3 object matched (which did not match in the previous version). needs_alert = True else: # This binary has never been matched before. self._create_new_entry(binary, analyzer_version) needs_alert = True return needs_alert
def save_matches_and_alert( self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str) -> None: """Save match results to Dynamo and publish an alert to SNS if appropriate. Args: analyzer_version: The currently executing version of the Lambda function. dynamo_table_name: Save YARA match results to this Dynamo table. sns_topic_arn: Publish match alerts to this SNS topic ARN. """ table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name) needs_alert = table.save_matches(self, analyzer_version) # Send alert if appropriate. if needs_alert: LOGGER.info('Publishing an SNS alert') analyzer_aws_lib.publish_alert_to_sns(self, sns_topic_arn)
def _yextend_matches(self, target_file: str) -> List[YaraMatch]: """Use yextend to check for YARA matches against archive contents. Args: target_file: Local path to target file to be analyzed. Returns: List of YaraMatch tuples, or an empty list if yextend didn't work correctly. """ try: output = subprocess.check_output([ './yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j' ], stderr=subprocess.STDOUT) except subprocess.CalledProcessError: LOGGER.exception('Yextend invocation failed') return [] try: decoded_output = output.decode('utf-8') except UnicodeDecodeError: LOGGER.error('Yextend output could not be decoded to utf-8:\n%s', output) return [] try: yextend_list = json.loads(decoded_output) except json.JSONDecodeError: # There may be an error message on the first line and then the JSON result. try: yextend_list = json.loads('\n'.join( decoded_output.split('\n')[1:])) except json.JSONDecodeError: # Still can't parse as JSON LOGGER.error('Cannot parse yextend output as JSON:\n%s', decoded_output) return [] # Yextend worked! try: return _convert_yextend_to_yara_match(yextend_list[0]) except (KeyError, IndexError): LOGGER.exception('Unexpected yextend output format') LOGGER.error('Yextend output: %s', decoded_output) return []
def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None: """Create a new Dynamo entry with YARA match information.""" LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)', binary.computed_sha, analyzer_version) item = { 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version, 'MatchedRules': binary.matched_rule_ids, 'MD5': binary.computed_md5, 'S3LastModified': binary.s3_last_modified, 'S3Metadata': self._replace_empty_strings(binary.s3_metadata), 'S3Objects': {binary.s3_identifier} } try: self._table.put_item(Item=item) except ClientError: LOGGER.error('Error saving item %s', item) raise
def save_matches_and_alert( self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str, sns_enabled: bool = True) -> None: """Save match results to Dynamo and publish an alert to SNS if appropriate. Args: analyzer_version: The currently executing version of the Lambda function. dynamo_table_name: Save YARA match results to this Dynamo table. sns_topic_arn: Publish match alerts to this SNS topic ARN. sns_enabled: If True, match alerts are sent to SNS when applicable. """ table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name) needs_alert = table.save_matches(self, analyzer_version) # Send alert if appropriate. if needs_alert and sns_enabled: LOGGER.info('Publishing a YARA match alert to %s', sns_topic_arn) subject = '[BiAlert] {} matches a YARA rule'.format( self.filepath or self.computed_sha) analyzer_aws_lib.publish_to_sns(self, sns_topic_arn, subject)
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: # Ignore all UPX output subprocess.check_output(['./upx', '-q', '-d', target_file], stderr=subprocess.STDOUT) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file # Raw YARA matches (yara-python) raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path)) yara_python_matches = [] for match in raw_yara_matches: string_names = set() string_data = set() for _, name, data in match.strings: string_names.add(name) try: string_data.add(data.decode('utf-8')) except UnicodeDecodeError: # Bytes string is not unicode - print its hex values instead string_data.add(data.hex()) yara_python_matches.append( YaraMatch(match.rule, match.namespace, match.meta, string_names, string_data)) return yara_python_matches + self._yextend_matches(target_file)
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # Raw YARA matches (yara-python) # TODO: Once yextend is more robust, we may eventually not need yara-python anymore. raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path)) yara_python_matches = [ YaraMatch(m.rule, m.namespace, m.meta, set(t[1] for t in m.strings)) for m in raw_yara_matches ] # Yextend matches os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] try: yextend_output = subprocess.check_output([ './yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j' ]) yextend_list = json.loads(yextend_output.decode('utf-8')) except (json.JSONDecodeError, subprocess.CalledProcessError): LOGGER.exception('Fatal error when running yextend') return yara_python_matches yextend_matches = _convert_yextend_to_yara_match(yextend_list[0]) return yara_python_matches + yextend_matches
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: # Ignore all UPX output subprocess.check_output(['./upx', '-q', '-d', target_file], stderr=subprocess.STDOUT) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file thor_matches = [] # THOR matches response = requests.post('http://127.0.0.1:8080/api/check', files=dict(file=open(target_file, 'rb'))) if response.status_code == 200: messages = response.json() for message in messages: LOGGER.info("Received THOR log message: %s", str(message)) if "matches" in message: for match in message["matches"]: try: metadata = { "description": match["reason"], "reference": match["ref"], "date": match["ruledate"], "tags": ", ".join(match["tags"]), "score": match["subscore"], } namespace = "THOR" if "sigtype" in match and (match["sigtype"] == 1 or match["sigtype"] == "custom"): namespace = "custom" string_matches = match["matched"] if string_matches is None: string_matches = ["None"] thor_matches.append( YaraMatch(match["rulename"], namespace, metadata, set(["Unknown"]), set(string_matches))) except (IndexError, KeyError): # THOR match with unexpected syntax LOGGER.info("Could not parse THOR match: %s", str(match)) response.close() return thor_matches
def __init__(self) -> None: """Initialize the analyzer. """ LOGGER.info('Starting THOR server') self.proc = subprocess.Popen( ['./thor-linux-64', '--thunderstorm', '--pure-yara'], stdout=subprocess.PIPE, universal_newlines=True) self._rule_count = 0 startup_successful = False while not startup_successful and self.proc.poll() is None: line = self.proc.stdout.readline() if "service started" in line: startup_successful = True rulecountmatch = RULE_COUNT_REGEX.search(line) if rulecountmatch is not None: self._rule_count = int(rulecountmatch.group(1)) LOGGER.info(line) if not startup_successful: LOGGER.info(self.proc.stdout.read()) raise Exception("THOR startup was not successful") LOGGER.info('Started THOR server')
def _objects_to_analyze( event: Dict[str, Any]) -> Generator[Tuple[str, str], None, None]: """Parse the invocation event into a list of objects to analyze. Args: event: Invocation event, from either the dispatcher or an S3 bucket Yields: (bucket_name, object_key) string tuples to analyze """ if set(event) == {'messages', 'queue_url'}: LOGGER.info('Invoked from dispatcher with %d messages', len(event['messages'])) for sqs_record in event['messages']: try: s3_records = json.loads(sqs_record['body'])['Records'] except (json.JSONDecodeError, KeyError, TypeError): LOGGER.exception('Skipping invalid SQS message %s', sqs_record) continue yield from _s3_objects(s3_records) else: LOGGER.info('Invoked with dictionary (S3 Event)') yield from _s3_objects(event['Records'])
def analyze_lambda_handler(event_data: Dict[str, Any], lambda_context) -> Dict[str, Dict[str, Any]]: """Lambda function entry point. Args: event_data: [dict] of the form: { 'S3Objects': [...], # S3 object keys. 'SQSReceipts': [...] # SQS receipt handles (to be deleted after processing). } There can be any number of S3objects, but no more than 10 SQS receipts. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: lambda_version = -1 LOGGER.info('Processing %d record(s)', len(event_data['S3Objects'])) for s3_key in event_data['S3Objects']: # S3 keys in event notifications are url-encoded. s3_key = urllib.parse.unquote_plus(s3_key) LOGGER.info('Analyzing "%s"', s3_key) with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) # Delete all of the SQS receipts (mark them as completed). analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'], event_data['SQSReceipts']) # Publish metrics. try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except BotoError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event_data: Dict[str, Any], lambda_context) -> Dict[str, Dict[str, Any]]: """Lambda function entry point. Args: event_data: [dict] of the form: { 'Records': [ { "s3": { "object": { "key": "FileName.txt" }, "bucket": { "name": "mybucket" } } } ], 'SQSReceipts': [...] # SQS receipt handles (to be deleted after processing). } There can be any number of S3objects, but no more than 10 SQS receipts. The Records are the same format as the S3 Put event, which means the analyzer could be directly linked to an S3 bucket notification if needed. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: lambda_version = -1 LOGGER.info('Processing %d record(s)', len(event_data['Records'])) for record in event_data['Records']: bucket_name = record['s3']['bucket']['name'] s3_key = urllib.parse.unquote_plus(record['s3']['object']['key']) LOGGER.info('Analyzing "%s:%s"', bucket_name, s3_key) with binary_info.BinaryInfo(bucket_name, s3_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) # Delete all of the SQS receipts (mark them as completed). analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'], event_data.get('SQSReceipts', [])) # Publish metrics. try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except BotoError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event: Dict[str, Any], lambda_context: Any) -> Dict[str, Dict[str, Any]]: """Analyzer Lambda function entry point. Args: event: SQS message batch sent by the dispatcher: { 'messages': [ { 'body': (str) JSON-encoded S3 put event: { 'Records': [ { 's3': { 'object': { 'key': (str) }, 'bucket': { 'name': (str) } } }, ... ] }, 'receipt': (str) SQS message receipt handle, 'receive_count': (int) Approx. # of times this has been received }, ... ], 'queue_url': (str) SQS queue url from which the message originated } Alternatively, the event can be an S3 Put Event dictionary (with no sqs information). This allows the analyzer to be linked directly to an S3 bucket notification if needed. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH. os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'], os.environ['LAMBDA_TASK_ROOT']) os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: LOGGER.warning('Invoked $LATEST instead of a versioned function') lambda_version = -1 for bucket_name, object_key in _objects_to_analyze(event): LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key) try: with binary_info.BinaryInfo(bucket_name, object_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) except analyzer_aws_lib.FileDownloadError: LOGGER.exception('Unable to download %s from %s', object_key, bucket_name) continue if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) # Delete all of the SQS receipts (mark them as completed). receipts_to_delete = [msg['receipt'] for msg in event.get('messages', [])] if receipts_to_delete: analyzer_aws_lib.delete_sqs_messages(event['queue_url'], receipts_to_delete) # Publish metrics. if binaries: try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except ClientError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event: Dict[str, Any], lambda_context: Any) -> Dict[str, Any]: """Analyzer Lambda function entry point. Args: event: SQS message batch - each message body is a JSON-encoded S3 notification - { 'Records': [ { 'body': json.dumps({ 'Records': [ 's3': { 'bucket': { 'name': '...' }, 'object': { 'key': '...' } } ] }), 'messageId': '...' } ] } lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH. os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'], os.environ['LAMBDA_TASK_ROOT']) os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: LOGGER.warning('Invoked $LATEST instead of a versioned function') lambda_version = -1 for bucket_name, object_key in _objects_to_analyze(event): LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key) try: with binary_info.BinaryInfo(bucket_name, object_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) except analyzer_aws_lib.FileDownloadError: LOGGER.exception('Unable to download %s from %s', object_key, bucket_name) continue if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) if os.environ['SAFE_SNS_TOPIC_ARN']: binary.safe_alert_only(os.environ['SAFE_SNS_TOPIC_ARN']) # Publish metrics. if binaries: try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except ClientError: LOGGER.exception('Error saving metric data') return result