def get_printable_key_value(self, key, value): output = u"" printkey = key if isinstance(value, (str, bytes)): output += u"{:20} {}\n".format(printkey, convert_to_unicode(value)) else: for item in value: if isinstance(item, (str, bytes)): output += u"{:20} {}\n".format(printkey, convert_to_unicode(item)) else: output += u"{:20} {}\n".format(printkey, self.format_list(item, key=key)) printkey = u"" return output
def gen_results(self, parser_name, input_file_path): """ Generate JSON results for the given file using the given parser name. """ self.reporter.run_parser(parser_name, input_file_path) self.reporter.metadata[INPUT_FILE_PATH] = convert_to_unicode(input_file_path) return self.reporter.metadata
def add_metadata(self, key, value): """ Report a metadata item Primary method to report metadata as a result of parsing. Args: key: string specifying the key of the metadata. Should be one of values specified in fields.json. value: string specifying the value of the metadata. Should be a utf-8 encoded string or a unicode object. """ keyu = convert_to_unicode(key) if value is None or all(not _value for _value in value): logger.warn("no values provided for %s, skipping" % key) return if keyu not in self.fields: raise KeyError('Invalid field name: {}'.format(keyu)) fieldtype = self.fields[keyu]['type'] try: if fieldtype == "listofstrings": self._add_metatadata_listofstrings(keyu, value) if fieldtype == "listofstringtuples": self._add_metadata_listofstringtuples(keyu, value) if fieldtype == "dictofstrings": self._add_metadata_dictofstrings(keyu, value) except Exception: logger.exception("Error adding metadata for key: {}".format(keyu))
def _write_csv(input_files, results, csv_path=None): """ Writes out results as a csv. :param input_files: List of filenames for each respective metadata. :param results: List of metadata dictionaries. :param csv_path: Path to write out csv file, defaults to stdout. :raises IOError: If csv could not be written out. """ scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Add/Teak metadata. for inputfilename, metadata in zip(input_files, results): # Add scan date. metadata[u'scan_date'] = scan_date if u'inputfilename' not in metadata: metadata[u'inputfilename'] = inputfilename # Flatten 'other' entry so nested values get their own columns, # are more readable, and easier to individually analyze. # # Example: # {'other': {"unique_entry": "value", "unique_key": "value2"}} # Results in columns: other, other.unique_entry, other.unique_key if u'other' in metadata: for sub_key, sub_value in metadata[u'other'].items(): metadata[u'other.{}'.format(convert_to_unicode(sub_key))] = sub_value del metadata[u'other'] # Split outputfile into multiple fields. if u'outputfile' in metadata: value = list(zip(*metadata[u'outputfile'])) metadata[u'outputfile.name'] = value[0] metadata[u'outputfile.description'] = value[1] metadata[u'outputfile.md5'] = value[2] del metadata[u'outputfile'] # Sort columns, but with PREFIX_COLUMNS showing up first. column_names = set(itertools.chain(*(metadata.keys() for metadata in results))) column_names = sorted( column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else str(x)) # Reformat metadata and write to CSV if csv_path is None: csvfile = sys.stdout else: csvfile = open(csv_path, 'wb' if six.PY2 else 'w') try: dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n') dw.writeheader() for metadata in results: dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()}) finally: if csv_path: csvfile.close()
def _format_metadata_value(v): """Formats metadata value to a human readable unicode string.""" if isinstance(v, (list, tuple)): result = u'' for j in v: if not isinstance(j, (bytes, str)): result += u'{}\n'.format(u', '.join(map(convert_to_unicode, j))) else: result += u'{}\n'.format(convert_to_unicode(j)) return result.rstrip() elif isinstance(v, dict): result = u'' for field, value in iteritems(v): if isinstance(value, (list, tuple)): value = u'[{}]'.format(u', '.join(value)) result += u'{}: {}\n'.format(field, value) return result.rstrip() else: return convert_to_unicode(v)
def _write_csv(input_files, results, csv_path, base64_outputfiles=False): """ Writes out results as a csv. :param input_files: List of filenames for each respective metadata. :param results: List of metadata dictionaries. :param csv_path: Path to write out csv file. :param base64_outputfiles: Whether to include base64 outputfiles. :raises IOError: If csv could not be written out. """ scan_date = time.ctime() # Add/Teak metadata. for inputfilename, metadata in zip(input_files, results): # Add scan date. metadata['scan_date'] = scan_date if 'inputfilename' not in metadata: metadata['inputfilename'] = inputfilename # Flatten 'other' entry so nested values get their own columns, # are more readable, and easier to individually analyze. # # Example: # {'other': {"unique_entry": "value", "unique_key": "value2"}} # Results in columns: other, other.unique_entry, other.unique_key if 'other' in metadata: for sub_key, sub_value in metadata['other'].items(): metadata['other.{}'.format(convert_to_unicode(sub_key))] = sub_value del metadata['other'] # Split outputfile into multiple fields. if 'outputfile' in metadata: value = list(zip(*metadata['outputfile'])) metadata['outputfile.name'] = value[0] metadata['outputfile.description'] = value[1] metadata['outputfile.md5'] = value[2] if len(value) > 3 and base64_outputfiles: metadata['outputfile.base64'] = value[3] del metadata['outputfile'] # Sort columns, but with PREFIX_COLUMNS showing up first. column_names = set(itertools.chain(*(metadata.keys() for metadata in results))) column_names = sorted( column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else x) # Reformat metadata and write to CSV with open(csv_path, 'wb' if sys.version_info.major < 3 else 'w') as csvfile: dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n') dw.writeheader() for metadata in results: dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
def _add_metadata_dictofstrings(self, key, value): # check for type of other? for subkey, subvalue in value.items(): if isinstance(subvalue, (bytes, str)): subkey = convert_to_unicode(subkey) subvalue = convert_to_unicode(subvalue) obj = self.metadata.setdefault(key, {}) if subkey in obj: # this key already exists, we don't want to clobber so # we turn into list? existing_value = obj[subkey] if isinstance(existing_value, list): if subvalue not in obj[subkey]: obj[subkey].append(subvalue) elif subvalue != existing_value: obj[subkey] = [existing_value, subvalue] else: # normal insert of single value obj[subkey] = subvalue else: # TODO: support inserts of lists (assuming members are strings)? logger.warn("Could not add object of %s to metadata under other using key %s" % ( str(type(subvalue[subkey])), subkey))
def get_report(self, json=False, tabs=1): """ If json parameter is False, get report as a unicode string. If json parameter is True, get report as a dictionary. """ if json: return self.__dict__ else: tab = tabs * "\t" tab_1 = tab + "\t" tab_2 = tab_1 + "\t" report = tab + "{}:\n".format(self.field) report += tab_1 + "Passed: {}\n".format(self.passed) if self.missing: report += tab_1 + "Missing From New Results:\n" for item in self.missing: report += tab_2 + "{}\n".format(convert_to_unicode(item)) if self.unexpected: report += tab_1 + "Unexpected New Results:\n" for item in self.unexpected: report += tab_2 + "{}\n".format(convert_to_unicode(item)) return report
def file_path(self): """ Returns a full file path to the file object. This is useful for when you want to use this file on libraries which require a file path instead of data or file-like object (e.g. cabinet). Always create a temporary file, this avoids issues where the identify function requires the file_path and the file would be output before a description is set. """ if not self._file_path: safe_file_name = convert_to_unicode(self.md5) file_path = os.path.join(self.reporter.managed_tempdir, safe_file_name) with open(file_path, 'wb') as file_object: file_object.write(self.file_data) self._file_path = file_path return self._file_path
def __init__( self, file_data, reporter, pe=None, file_name=None, def_stub=None, description=None, output_file=True, use_supplied_fname=True, use_arch=False, ext='.bin'): """ Initializes the FileObject. :param bytes file_data: Data for the file. :param pefile.PE pe: PE object for the file. :param mwcp.Reporter reporter: MWCP reporter. :param str file_name: File name to use if file is not a PE or use_supplied_fname was specified. :param str description: Description of the file object. :param bool output_file: Boolean indicating if file should be outputted when the dispatcher process the file. :param bool use_supplied_fname: Boolean indicating if the file_name should be used even if the file is a PE. :param str def_stub: def_stub argument to pass to obtain_original_filename() :param bool use_arch: use_arch argument to pass to obtain_original_filename() :param str ext: default extension to use if not determined from pe file. """ self._file_path = None self._md5 = None self._sha1 = None self._sha256 = None self._stack_strings = None self._resources = None self._elf = None self._elf_attempt = False self.output_file = output_file self._outputted_file = False self._kordesii_cache = {} self.parent = None # Parent FileObject from which FileObject was extracted from (this is set externally). self.parser = None # This will be set by the dispatcher. self.file_data = file_data self.reporter = reporter self.description = description self.knowledge_base = {} self.pe = pe or pefileutils.obtain_pe(file_data) use_supplied_fname = use_supplied_fname or not self.pe if file_name and use_supplied_fname: self._file_name = file_name else: self._file_name = pefileutils.obtain_original_filename( def_stub or self.md5, pe=self.pe, use_arch=use_arch, ext=ext) self._file_name = convert_to_unicode(self._file_name)
def _compare_results_field(self, results_a, results_b, field_name): """ Compare the values for a single results field in the two passed in results. Args: results_a (dict): MWCP generated result for a given file using a given parser. results_b (dict): MWCP generated result for a given file using a given parser. """ # Check if provided field_name is a valid key (based on fields.json) try: field_name_u = convert_to_unicode(field_name) except: raise Exception( "Failed to convert field name '{}' to unicode.".format(field_name)) try: field_type = self._reporter.fields[field_name_u]['type'] except: raise Exception( "Key error. Field name '{}' was not identified as a standardized field.".format(field_name)) # Establish value to send for comparison value_a = None value_b = None if field_name_u in results_a: value_a = results_a[field_name_u] if field_name_u in results_b: value_b = results_b[field_name_u] # Now compare results based on field type (see "fields.json" for more # details) if field_type == "listofstrings": comparer = ListOfStringsComparer(field_name_u) comparer.compare(value_a, value_b) elif field_type == "listofstringtuples": comparer = ListOfStringTuplesComparer(field_name_u) comparer.compare(value_a, value_b) elif field_type == "dictofstrings": comparer = DictOfStringsComparer(field_name_u) comparer.compare(value_a, value_b) else: raise Exception("Unhandled field type '{}' found for field name '{}'.".format( field_type, field_name)) return comparer
def run(self): """Run test case.""" start_time = default_timer() self._reporter.run_parser(self.parser, self.input_file_path) self._reporter.metadata[INPUT_FILE_PATH] = convert_to_unicode(self.input_file_path) results = self._reporter.metadata comparer_results = self._compare_results(self.expected_results, results) passed = all(comparer.passed for comparer in comparer_results) done_time = default_timer() run_time = done_time - start_time return TestResult( parser=self.parser, input_file_path=self.input_file_path, passed=passed, errors=self._reporter.errors, debug=self._reporter.metadata.get('debug', None), results=comparer_results, run_time=run_time )
def _add_metatadata_listofstrings(self, key, value): if not value: logger.info("no values provided for {}, skipping".format(key)) return value = convert_to_unicode(value) obj = self.metadata.setdefault(key, []) if key == 'debug' or value not in obj: obj.append(value) if key == "filepath": # use ntpath instead of os.path so we are consistent across platforms. ntpath # should work for both windows and unix paths. os.path works for the platform # you are running on, not necessarily what the malware was written for. # Ex. when running mwcp on linux to process windows # malware, os.path will fail due to not handling # backslashes correctly. self.add_metadata("filename", ntpath.basename(value)) self.add_metadata("directory", ntpath.dirname(value)) if key == "c2_url": self.add_metadata("url", value) if key in ("c2_address", "proxy_address"): self.add_metadata("address", value) if key == "serviceimage": # we use tactic of looking for first .exe in value. This is # not guaranteed to be reliable if '.exe' in value: self.add_metadata("filepath", value[ 0:value.find('.exe') + 4]) if key == "servicedll": self.add_metadata("filepath", value) if key == "ssl_cer_sha1": if not self.SHA1_RE.match(value): logger.error("Invalid SHA1 hash found: {!r}".format(value)) if key in ("url", "c2_url"): # http://[fe80::20c:1234:5678:9abc]:80/badness # http://bad.com:80 # ftp://127.0.0.1/really/bad?hostname=pwned match = self.URL_RE.search(value) if not match: logger.error("Error parsing as url: %s" % value) return if match.group("path"): self.add_metadata("urlpath", match.group("path")) if match.group("address"): address = match.group("address").rstrip(': ') if address.startswith("["): # ipv6--something like # [fe80::20c:1234:5678:9abc]:80 domain, found, port = address[1:].partition(']:') else: domain, found, port = address.partition(":") if found: if port: if key == "c2_url": self.add_metadata("c2_socketaddress", [domain, port, "tcp"]) else: self.add_metadata("socketaddress", [domain, port, "tcp"]) else: logger.error("Invalid URL {!r} found ':' at end without a port.".format(address)) else: if key == "c2_url": self.add_metadata("c2_address", address) else: self.add_metadata("address", address)
def _add_metatadata_listofstrings(self, key, value): if not value: logger.error("no values provided for {}, skipping".format(key)) return value = convert_to_unicode(value) obj = self.metadata.setdefault(key, []) if self._disable_value_dedup or key == 'debug' or value not in obj: obj.append(value) if self._disable_auto_subfield_parsing: return if key == "filepath": # use ntpath instead of os.path so we are consistent across platforms. ntpath # should work for both windows and unix paths. os.path works for the platform # you are running on, not necessarily what the malware was written for. # Ex. when running mwcp on linux to process windows # malware, os.path will fail due to not handling # backslashes correctly. self.add_metadata("filename", ntpath.basename(value)) self.add_metadata("directory", ntpath.dirname(value)) if key == "c2_url": self.add_metadata("url", value) if key in ("c2_address", "proxy_address"): self.add_metadata("address", value) if key == "serviceimage": # we use tactic of looking for first .exe in value. This is # not guaranteed to be reliable if '.exe' in value: self.add_metadata("filepath", value[ 0:value.find('.exe') + 4]) if key == "servicedll": self.add_metadata("filepath", value) if key == "ssl_cer_sha1": if not self.SHA1_RE.match(value): logger.error("Invalid SHA1 hash found: {!r}".format(value)) if key in ("url", "c2_url"): # http://[fe80::20c:1234:5678:9abc]:80/badness # http://bad.com:80 # ftp://127.0.0.1/really/bad?hostname=pwned match = self.URL_RE.search(value) if not match: logger.error("Error parsing as url: %s" % value) return if match.group("path"): self.add_metadata("urlpath", match.group("path")) if match.group("address"): address = match.group("address").rstrip(': ') if address.startswith("["): # ipv6--something like # [fe80::20c:1234:5678:9abc]:80 domain, found, port = address[1:].partition(']:') else: domain, found, port = address.partition(":") if found: if port: if key == "c2_url": self.add_metadata("c2_socketaddress", [domain, port, "tcp"]) else: self.add_metadata("socketaddress", [domain, port, "tcp"]) else: logger.error("Invalid URL {!r} found ':' at end without a port.".format(address)) else: if key == "c2_url": self.add_metadata("c2_address", address) else: self.add_metadata("address", address)
def file_name(self, value): # If someone changes the name, record the rename. value = convert_to_unicode(value) if self._file_name != value: self.reporter.debug('[*] Renamed {} to {}'.format(self._file_name, value)) self._file_name = value
def file_name(self, value): # If someone changes the name, record the rename. value = convert_to_unicode(value) if self._file_name != value: logger.info('Renamed {} to {}'.format(self._file_name, value)) self._file_name = value
def _write_csv(input_files, results, csv_path=None): """ Writes out results as a csv. :param input_files: List of filenames for each respective metadata. :param results: List of metadata dictionaries. :param csv_path: Path to write out csv file, defaults to stdout. :raises IOError: If csv could not be written out. """ scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Add/Teak metadata. for inputfilename, metadata in zip(input_files, results): # Add scan date. metadata[u'scan_date'] = scan_date if u'inputfilename' not in metadata: metadata[u'inputfilename'] = inputfilename # Flatten 'other' entry so nested values get their own columns, # are more readable, and easier to individually analyze. # # Example: # {'other': {"unique_entry": "value", "unique_key": "value2"}} # Results in columns: other, other.unique_entry, other.unique_key if u'other' in metadata: for sub_key, sub_value in metadata[u'other'].items(): metadata[u'other.{}'.format( convert_to_unicode(sub_key))] = sub_value del metadata[u'other'] # Split outputfile into multiple fields. if u'outputfile' in metadata: value = list(zip(*metadata[u'outputfile'])) metadata[u'outputfile.name'] = value[0] metadata[u'outputfile.description'] = value[1] metadata[u'outputfile.md5'] = value[2] del metadata[u'outputfile'] # Sort columns, but with PREFIX_COLUMNS showing up first. column_names = set( itertools.chain(*(metadata.keys() for metadata in results))) column_names = sorted(column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else str(x)) # Reformat metadata and write to CSV if csv_path is None: csvfile = sys.stdout else: csvfile = open(csv_path, 'wb' if six.PY2 else 'w') try: dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n') dw.writeheader() for metadata in results: dw.writerow( {k: _format_metadata_value(v) for k, v in metadata.items()}) finally: if csv_path: csvfile.close()
def file_name(self, value): # If someone changes the name, record the rename. value = convert_to_unicode(value) if self._file_name != value: logger.info("Renamed {} to {}".format(self._file_name, value)) self._file_name = value
def __init__( self, file_data: bytes, reporter, pe: pefile.PE = None, file_name=None, def_stub=None, description=None, output_file=True, use_supplied_fname=True, use_arch=False, ext=".bin", ): """ Initializes the FileObject. :param bytes file_data: Data for the file. :param pefile.PE pe: PE object for the file. :param mwcp.Reporter reporter: MWCP reporter. :param str file_name: File name to use if file is not a PE or use_supplied_fname was specified. :param str description: Description of the file object. :param bool output_file: Boolean indicating if file should be outputted when the dispatcher process the file. :param bool use_supplied_fname: Boolean indicating if the file_name should be used even if the file is a PE. :param str def_stub: def_stub argument to pass to obtain_original_filename() :param bool use_arch: use_arch argument to pass to obtain_original_filename() :param str ext: default extension to use if not determined from pe file. """ # Ensure we are getting a bytes string. Libraries like pefile depend on this. if not isinstance(file_data, bytes): raise TypeError("file_data must be a bytes string.") self._file_path = None self._md5 = None self._sha1 = None self._sha256 = None self._stack_strings = None self._static_strings = None self._resources = None self._elf = None self._elf_attempt = False self.output_file = output_file self._outputted_file = False self._kordesii_cache = {} self.parent = None # Parent FileObject from which FileObject was extracted from (this is set externally). self.parser = None # This will be set by the dispatcher. self.children = [] # List of residual FileObject self.file_data = file_data self.reporter = reporter self.description = description self.knowledge_base = {} self.pe = pe or pefileutils.obtain_pe(file_data) use_supplied_fname = use_supplied_fname or not self.pe if file_name and use_supplied_fname: self._file_name = file_name else: self._file_name = pefileutils.obtain_original_filename( def_stub or self.md5, pe=self.pe, use_arch=use_arch, ext=ext ) self._file_name = convert_to_unicode(self._file_name)