def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # Raw YARA matches (yara-python) # TODO: Once yextend is more robust, we may eventually not need yara-python anymore. raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path) ) yara_python_matches = [ YaraMatch(m.rule, m.namespace, m.meta, set(t[1] for t in m.strings)) for m in raw_yara_matches ] # Yextend matches os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] yextend_output = None try: yextend_output = subprocess.check_output( ['./yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j']) yextend_list = json.loads(yextend_output.decode('utf-8')) return yara_python_matches + _convert_yextend_to_yara_match(yextend_list[0]) except Exception: # pylint: disable=broad-except # If yextend fails for any reason, still return the yara-python match results. LOGGER.exception('Error running yextend or parsing its output') if yextend_output: LOGGER.error('yextend output: <%s>', yextend_output) return yara_python_matches
def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None: """Create a new Dynamo entry with YARA match information.""" LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)', binary.computed_sha, analyzer_version) item = { 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version, 'MatchedRules': binary.matched_rule_ids, 'MD5': binary.computed_md5, 'S3LastModified': binary.s3_last_modified, 'S3Metadata': self._replace_empty_strings(binary.s3_metadata), 'S3Objects': {binary.s3_identifier} } try: self._table.put_item(Item=item) except ClientError: LOGGER.error('Error saving item %s', item) raise
def _yextend_matches(self, target_file: str) -> List[YaraMatch]: """Use yextend to check for YARA matches against archive contents. Args: target_file: Local path to target file to be analyzed. Returns: List of YaraMatch tuples, or an empty list if yextend didn't work correctly. """ try: output = subprocess.check_output([ './yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j' ], stderr=subprocess.STDOUT) except subprocess.CalledProcessError: LOGGER.exception('Yextend invocation failed') return [] try: decoded_output = output.decode('utf-8') except UnicodeDecodeError: LOGGER.error('Yextend output could not be decoded to utf-8:\n%s', output) return [] try: yextend_list = json.loads(decoded_output) except json.JSONDecodeError: # There may be an error message on the first line and then the JSON result. try: yextend_list = json.loads('\n'.join( decoded_output.split('\n')[1:])) except json.JSONDecodeError: # Still can't parse as JSON LOGGER.error('Cannot parse yextend output as JSON:\n%s', decoded_output) return [] # Yextend worked! try: return _convert_yextend_to_yara_match(yextend_list[0]) except (KeyError, IndexError): LOGGER.exception('Unexpected yextend output format') LOGGER.error('Yextend output: %s', decoded_output) return []