Пример #1
0
 def _add_s3_key(self, binary: BinaryInfo, analyzer_version: int) -> None:
     """Add S3 key to an existing entry. If the S3 key already exists, this is a no-op."""
     LOGGER.info(
         'Adding %s to existing entry (SHA256: %s, AnalyzerVersion: %d)',
         binary.s3_identifier, binary.computed_sha, analyzer_version)
     self._table.update_item(
         Key={
             'SHA256': binary.computed_sha,
             'AnalyzerVersion': analyzer_version
         },
         UpdateExpression='ADD S3Objects :s3_string_set',
         ExpressionAttributeValues={
             ':s3_string_set': {binary.s3_identifier}
         })
Пример #2
0
 def _create_new_entry(self, binary: BinaryInfo,
                       analyzer_version: int) -> None:
     """Create a new Dynamo entry with YARA match information."""
     LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)',
                 binary.computed_sha, analyzer_version)
     item = {
         'SHA256': binary.computed_sha,
         'AnalyzerVersion': analyzer_version,
         'MatchedRules': binary.matched_rule_ids,
         'MD5': binary.computed_md5,
         'S3LastModified': binary.s3_last_modified,
         'S3Metadata': binary.s3_metadata,
         'S3Objects': {binary.s3_identifier}
     }
     self._table.put_item(Item=item)
Пример #3
0
    def save_matches_and_alert(
            self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str) -> None:
        """Save match results to Dynamo and publish an alert to SNS if appropriate.

        Args:
            analyzer_version: The currently executing version of the Lambda function.
            dynamo_table_name: Save YARA match results to this Dynamo table.
            sns_topic_arn: Publish match alerts to this SNS topic ARN.
        """
        table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name)
        needs_alert = table.save_matches(self, analyzer_version)

        # Send alert if appropriate.
        if needs_alert:
            LOGGER.info('Publishing an SNS alert')
            analyzer_aws_lib.publish_alert_to_sns(self, sns_topic_arn)
Пример #4
0
    def analyze(self,
                target_file: str,
                original_target_path: str = '') -> List[YaraMatch]:
        """Run YARA analysis on a file.

        Args:
            target_file: Local path to target file to be analyzed.
            original_target_path: Path where the target file was originally discovered.

        Returns:
            List of YaraMatch tuples.
        """
        # UPX-unpack the file if possible
        try:
            subprocess.check_call(['./upx', '-d', target_file])
            LOGGER.info('Unpacked UPX-compressed file %s', target_file)
        except subprocess.CalledProcessError:
            pass  # Not a packed file

        # Raw YARA matches (yara-python)
        # TODO: Once yextend is more robust, we may eventually not need yara-python anymore.
        raw_yara_matches = self._rules.match(
            target_file, externals=self._yara_variables(original_target_path))
        yara_python_matches = [
            YaraMatch(m.rule, m.namespace, m.meta, set(t[1]
                                                       for t in m.strings))
            for m in raw_yara_matches
        ]

        # Yextend matches
        os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']
        yextend_output = None
        try:
            yextend_output = subprocess.check_output([
                './yextend', '-r', self._compiled_rules_file, '-t',
                target_file, '-j'
            ])
            yextend_list = json.loads(yextend_output.decode('utf-8'))
            return yara_python_matches + _convert_yextend_to_yara_match(
                yextend_list[0])
        except Exception:  # pylint: disable=broad-except
            # If yextend fails for any reason, still return the yara-python match results.
            LOGGER.exception('Error running yextend or parsing its output')
            if yextend_output:
                LOGGER.error('yextend output: <%s>', yextend_output)
            return yara_python_matches
Пример #5
0
 def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None:
     """Create a new Dynamo entry with YARA match information."""
     LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)',
                 binary.computed_sha, analyzer_version)
     item = {
         'SHA256': binary.computed_sha,
         'AnalyzerVersion': analyzer_version,
         'MatchedRules': binary.matched_rule_ids,
         'MD5': binary.computed_md5,
         'S3LastModified': binary.s3_last_modified,
         'S3Metadata': self._replace_empty_strings(binary.s3_metadata),
         'S3Objects': {binary.s3_identifier}
     }
     try:
         self._table.put_item(Item=item)
     except ClientError:
         LOGGER.error('Error saving item %s', item)
         raise
Пример #6
0
    def save_matches_and_alert(
            self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str,
            sns_enabled: bool = True) -> None:
        """Save match results to Dynamo and publish an alert to SNS if appropriate.

        Args:
            analyzer_version: The currently executing version of the Lambda function.
            dynamo_table_name: Save YARA match results to this Dynamo table.
            sns_topic_arn: Publish match alerts to this SNS topic ARN.
            sns_enabled: If True, match alerts are sent to SNS when applicable.
        """
        table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name)
        needs_alert = table.save_matches(self, analyzer_version)

        # Send alert if appropriate.
        if needs_alert and sns_enabled:
            LOGGER.info('Publishing a YARA match alert to %s', sns_topic_arn)
            subject = '[BiAlert] {} matches a YARA rule'.format(
                self.filepath or self.computed_sha)
            analyzer_aws_lib.publish_to_sns(self, sns_topic_arn, subject)
Пример #7
0
    def analyze(self,
                target_file: str,
                original_target_path: str = '') -> List[YaraMatch]:
        """Run YARA analysis on a file.

        Args:
            target_file: Local path to target file to be analyzed.
            original_target_path: Path where the target file was originally discovered.

        Returns:
            List of YaraMatch tuples.
        """
        # UPX-unpack the file if possible
        try:
            # Ignore all UPX output
            subprocess.check_output(['./upx', '-q', '-d', target_file],
                                    stderr=subprocess.STDOUT)
            LOGGER.info('Unpacked UPX-compressed file %s', target_file)
        except subprocess.CalledProcessError:
            pass  # Not a packed file

        # Raw YARA matches (yara-python)
        raw_yara_matches = self._rules.match(
            target_file, externals=self._yara_variables(original_target_path))
        yara_python_matches = []

        for match in raw_yara_matches:
            string_names = set()
            string_data = set()
            for _, name, data in match.strings:
                string_names.add(name)
                try:
                    string_data.add(data.decode('utf-8'))
                except UnicodeDecodeError:
                    # Bytes string is not unicode - print its hex values instead
                    string_data.add(data.hex())
            yara_python_matches.append(
                YaraMatch(match.rule, match.namespace, match.meta,
                          string_names, string_data))

        return yara_python_matches + self._yextend_matches(target_file)
Пример #8
0
def _objects_to_analyze(
        event: Dict[str, Any]) -> Generator[Tuple[str, str], None, None]:
    """Parse the invocation event into a list of objects to analyze.

    Args:
        event: Invocation event, from either the dispatcher or an S3 bucket

    Yields:
        (bucket_name, object_key) string tuples to analyze
    """
    if set(event) == {'messages', 'queue_url'}:
        LOGGER.info('Invoked from dispatcher with %d messages',
                    len(event['messages']))
        for sqs_record in event['messages']:
            try:
                s3_records = json.loads(sqs_record['body'])['Records']
            except (json.JSONDecodeError, KeyError, TypeError):
                LOGGER.exception('Skipping invalid SQS message %s', sqs_record)
                continue
            yield from _s3_objects(s3_records)
    else:
        LOGGER.info('Invoked with dictionary (S3 Event)')
        yield from _s3_objects(event['Records'])
Пример #9
0
    def analyze(self,
                target_file: str,
                original_target_path: str = '') -> List[YaraMatch]:
        """Run YARA analysis on a file.

        Args:
            target_file: Local path to target file to be analyzed.
            original_target_path: Path where the target file was originally discovered.

        Returns:
            List of YaraMatch tuples.
        """
        # UPX-unpack the file if possible
        try:
            # Ignore all UPX output
            subprocess.check_output(['./upx', '-q', '-d', target_file],
                                    stderr=subprocess.STDOUT)
            LOGGER.info('Unpacked UPX-compressed file %s', target_file)
        except subprocess.CalledProcessError:
            pass  # Not a packed file
        thor_matches = []
        # THOR matches
        response = requests.post('http://127.0.0.1:8080/api/check',
                                 files=dict(file=open(target_file, 'rb')))
        if response.status_code == 200:
            messages = response.json()
            for message in messages:
                LOGGER.info("Received THOR log message: %s", str(message))
                if "matches" in message:
                    for match in message["matches"]:
                        try:
                            metadata = {
                                "description": match["reason"],
                                "reference": match["ref"],
                                "date": match["ruledate"],
                                "tags": ", ".join(match["tags"]),
                                "score": match["subscore"],
                            }
                            namespace = "THOR"
                            if "sigtype" in match and (match["sigtype"] == 1
                                                       or match["sigtype"]
                                                       == "custom"):
                                namespace = "custom"
                            string_matches = match["matched"]
                            if string_matches is None:
                                string_matches = ["None"]
                            thor_matches.append(
                                YaraMatch(match["rulename"], namespace,
                                          metadata, set(["Unknown"]),
                                          set(string_matches)))
                        except (IndexError,
                                KeyError):  # THOR match with unexpected syntax
                            LOGGER.info("Could not parse THOR match: %s",
                                        str(match))
        response.close()
        return thor_matches
Пример #10
0
 def __init__(self) -> None:
     """Initialize the analyzer.
     """
     LOGGER.info('Starting THOR server')
     self.proc = subprocess.Popen(
         ['./thor-linux-64', '--thunderstorm', '--pure-yara'],
         stdout=subprocess.PIPE,
         universal_newlines=True)
     self._rule_count = 0
     startup_successful = False
     while not startup_successful and self.proc.poll() is None:
         line = self.proc.stdout.readline()
         if "service started" in line:
             startup_successful = True
         rulecountmatch = RULE_COUNT_REGEX.search(line)
         if rulecountmatch is not None:
             self._rule_count = int(rulecountmatch.group(1))
         LOGGER.info(line)
     if not startup_successful:
         LOGGER.info(self.proc.stdout.read())
         raise Exception("THOR startup was not successful")
     LOGGER.info('Started THOR server')
Пример #11
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'S3Objects': [...],  # S3 object keys.
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['S3Objects']))
    for s3_key in event_data['S3Objects']:
        # S3 keys in event notifications are url-encoded.
        s3_key = urllib.parse.unquote_plus(s3_key)
        LOGGER.info('Analyzing "%s"', s3_key)

        with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key,
                                    ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data['SQSReceipts'])

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Пример #12
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Dict[str, Any]]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch sent by the dispatcher: {
            'messages': [
                {
                    'body': (str) JSON-encoded S3 put event: {
                        'Records': [
                            {
                                's3': {
                                    'object': {
                                        'key': (str)
                                    },
                                    'bucket': {
                                        'name': (str)
                                    }
                                }
                            },
                            ...
                        ]
                    },
                    'receipt': (str) SQS message receipt handle,
                    'receive_count': (int) Approx. # of times this has been received
                },
                ...
            ],
            'queue_url': (str) SQS queue url from which the message originated
        }
            Alternatively, the event can be an S3 Put Event dictionary (with no sqs information).
            This allows the analyzer to be linked directly to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])

    # Delete all of the SQS receipts (mark them as completed).
    receipts_to_delete = [msg['receipt'] for msg in event.get('messages', [])]
    if receipts_to_delete:
        analyzer_aws_lib.delete_sqs_messages(event['queue_url'],
                                             receipts_to_delete)

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result
Пример #13
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'Records': [
                {
                    "s3": {
                        "object": {
                            "key": "FileName.txt"
                        },
                        "bucket": {
                            "name": "mybucket"
                        }
                    }
                }
            ],
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
            The Records are the same format as the S3 Put event, which means the analyzer could be
            directly linked to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['Records']))
    for record in event_data['Records']:
        bucket_name = record['s3']['bucket']['name']
        s3_key = urllib.parse.unquote_plus(record['s3']['object']['key'])
        LOGGER.info('Analyzing "%s:%s"', bucket_name, s3_key)

        with binary_info.BinaryInfo(bucket_name, s3_key, ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data.get('SQSReceipts', []))

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Пример #14
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Any]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch - each message body is a JSON-encoded S3 notification - {
            'Records': [
                {
                    'body': json.dumps({
                        'Records': [
                            's3': {
                                'bucket': {
                                    'name': '...'
                                },
                                'object': {
                                    'key': '...'
                                }
                            }
                        ]
                    }),
                    'messageId': '...'
                }
            ]
        }
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
        else:
            LOGGER.info('%s did not match any YARA rules', binary)
            if os.environ['SAFE_SNS_TOPIC_ARN']:
                binary.safe_alert_only(os.environ['SAFE_SNS_TOPIC_ARN'])

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result