def hash_search(self, args, file, opts): document = db.file_collection.select(file.sha256_digest) proc = subprocess.run(['grep', document['md5_digest'], NSRL_PATH], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if "status 1" in str(proc.stderr): raise error.CommandWarning('file not found: ' + str(proc.stderr)) elif "status 2" in str(proc.stderr): raise error.CommandWarning( 'nist-nsrl module return status 2 error: ' + str(proc.stderr)) # Store the SHA1, MD5 and File Name hits in a list of tuples # We specifically drop the first and last char to remove quotes from the NIST DB file hits = [] for line in str(proc.stdout, encoding='utf').splitlines(): hits.append( (str(line).split(',')[0][1:-1], str(line).split(',')[1][1:-1], str(line).split(',')[3][1:-1])) # Deduplicate the list deduphits = list(set(hits)) output = [] for line in deduphits: output += [{ 'sha1': str(line[0]), 'md5': str(line[1]), 'file': str(line[2]) }] return {'hits': output}
def olevba_streams(self, args, file, opts): output = [] try: vbaparser = olevba3.VBA_Parser(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') try: vbaparser.detect_vba_macros() except: vbaparser.close() raise error.CommandWarning('no macro was detected on this file') try: macros = vbaparser.extract_all_macros() except: raise error.CommandWarning('vbaparser.extract_all_macros() failed to extract macros') i = 1 for m in macros: # pylint: disable=invalid-name try: output += [{ 'stream': str(i), 'stream_path': str(m[1]), 'vba_filename': str(m[2]), 'code': str(m[3].decode('utf-8')) }] except: output += [{ 'stream': str(i), 'stream_path': str(m[1]), 'vba_filename': str(m[2]), 'code': str(m[3]) }] i += 1 return output
def sections(self, args, file, opts): try: pe = pefile.PE(file.file_path, fast_load=True) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) output = [] try: for section in pe.sections: if isinstance(section.Name, bytes): section_name = section.Name.decode() else: section_name = section.Name output += [{ 'name': str(section_name), 'virtual_address': str(hex(section.VirtualAddress)), 'virtual_size': str(hex(section.Misc_VirtualSize)), 'physical_address': str(section.PointerToRawData), 'physical_size': str(section.SizeOfRawData), 'entropy': str(section.get_entropy()) }] except Exception as err: raise error.CommandWarning('an error occurred: %s' % err) return output
def imports(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) output = {} try: if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): for entry in pe.DIRECTORY_ENTRY_IMPORT: dll = [] name = None for imp in entry.imports: if not name: name = entry.dll.decode('utf-8') dll += [{ 'name': imp.name.decode('utf-8') if imp.name else str(imp.name), 'address': hex(imp.address) }] if name: output[name] = dll except Exception as err: raise error.CommandWarning('an error occurred: %s' % err) return output
def pdfid(self, args, file, opts): try: proc = subprocess.run(["python2", PDFID_PATH, file.file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except Exception as err: raise error.CommandWarning( "an unknown error occurred when running pdfid: %s" % err) output = str(proc.stdout, encoding='utf-8') if "Not a PDF document" in output: raise error.CommandWarning('file is not a pdf document') return {'data': output}
def scan(self, args, file, opts): try: clamd = pyclamd.ClamdAgnostic() res = clamd.scan_file(file.file_path) except Exception: raise error.CommandWarning('scanning file with clamav failed. check apparmor permissions?') if res is None: return {'hits': 'no hits with clamav'} elif res[file.file_path][0] == 'FOUND': return {'hits': str(res[file.file_path][1])} else: raise error.CommandWarning('unknown error occurred scanning file with clamav failed.')
def mraptor(self, args, file, opts): # Monkeypatch 1 - This is to force the script argument to the appropriate file locaiton for analysis def temp_args(_a, _b, _c, _d): return [file.file_path] # Deploy Monkeypatch 1 import optparse get_args = optparse.OptionParser._get_args optparse.OptionParser._get_args = temp_args # Monkeypatch - This rediverts stdout to a stream that can be collected later for results sys.stdout = io.StringIO() result = [] try: vbaparser = olevba3.VBA_Parser(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') filetype = olevba3.TYPE2TAG[vbaparser.type] if not vbaparser.detect_vba_macros(): vbaparser.close() raise error.CommandWarning('file does not have macros') try: vba_code_all_modules = '' for (subfilename, stream_path, vba_filename, vba_code) in vbaparser.extract_all_macros(): vba_code_all_modules += vba_code + '\n' m = mraptor3.MacroRaptor(vba_code_all_modules) m.scan() if m.suspicious: result += [{ 'Result': 'SUSPICIOUS', 'Flags': str(m.get_flags()), 'Match_on': str(m.matches) }] else: result += [{ 'Result': 'Macro seems fine' }] except Exception: # Revert patched functions to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ raise error.CommandWarning('failed to parse macros') # Revert patched function to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ reload(oledir) return result
def peepdf(self, args, file, opts): # TODO: Additional requirements are required for peepdf for further functionality if not str(file.magic).startswith('PDF'): raise error.CommandWarning('file is not a pdf document') try: proc = subprocess.run(["python2", PEEPDF_PATH, file.file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except Exception as err: raise error.CommandWarning( "an unknown error occurred when running peepdf: %s" % err) return {'data': str(proc.stdout, encoding='utf-8')}
def pdfparser(self, args, file, opts): if not str(file.magic).startswith('PDF'): raise error.CommandWarning('file is not a pdf document') try: proc = subprocess.run( ["python2", PDF_PARSER_PATH, file.file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except Exception as err: raise error.CommandWarning( "an unknown error occurred when running pdfparser: %s" % err) return {'data': str(proc.stdout, encoding='utf-8')}
def stack_strings(self, args, file, opts): try: proc = subprocess.run([self.lifer_path, file.file_path, '--no-static-strings', '--no-decoded-strings'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except TimeoutError: raise error.CommandWarning("timeout when running lifer") if proc.stderr: raise error.CommandWarning("an error occurred with the lifer module:\n%s" % proc.stderr.decode('utf-8')) if proc.stdout == '': raise error.CommandWarning("lifer stack strings returned no output") return {'stack_strings': proc.stdout.decode('utf-8')}
def all_strings(self, args, file, opts): try: proc = subprocess.run([self.floss_path, file.file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except TimeoutError: raise error.CommandWarning("timeout when running floss") if proc.stderr: raise error.CommandWarning("an error occurred with the floss module:\n%s" % proc.stderr.decode('utf-8')) if proc.stdout == '': raise error.CommandWarning("floss all strings returned no output") return {'all_strings': proc.stdout.decode('utf-8')}
def olemeta(self, args, file, opts): # Monkeypatch 1 - This is to force the script argument to the appropriate file locaiton for analysis def temp_args(_a, _b): return [file.file_path] # Deoploy Monkeypatch 1 import optparse get_args = optparse.OptionParser._get_args optparse.OptionParser._get_args = temp_args # Mokeypatch - This rediverts stdout to a stream that can be collected later for results sys.stdout = io.StringIO() try: olemeta.main() except Exception: # Revert patched functions to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ raise error.CommandWarning('Dir old dump error') result = { 'Result': sys.stdout.getvalue() } # To be displayed properly !!! # Revert patched function to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ reload(oledir) return result
def info(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member machine = pe.FILE_HEADER.Machine except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) if machine == 332: arch = 'i386' elif machine == 512: arch = 'ia64' elif machine == 34404: arch = 'amd64' else: arch = 'unknown' output = { 'compile_time': str(datetime.utcfromtimestamp(pe.FILE_HEADER.TimeDateStamp)), 'language': str(pefunctions.code_language(pe, file.file_path)), 'architecture': arch, 'certificate': pefunctions.get_certificate(pe) } return output
def all_info(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) return {'info': pe.dump_info()}
def imphash(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) document = db.file_collection.select(file.sha256_digest) if 'imphash' not in document: imphash = str(pe.get_imphash()) if imphash == "": return {"imphash": "none"} data = {'imphash': imphash} if not db.file_collection.update(file.sha256_digest, data): raise error.CommandWarning( 'Error adding imphash into file document %s' % file.sha256_digest) document = db.file_collection.select(file.sha256_digest) return {'imphash': document['imphash']}
def pehash(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) document = db.file_collection.select(file.sha256_digest) if 'pehash' not in document: pehash = pefunctions.calculate_pehash(pe) if 'An error occured' in pehash: raise error.CommandWarning( 'unable to calculate pehash with pefile') data = {'pehash': pehash} if not db.file_collection.update(file.sha256_digest, data): raise error.CommandWarning( 'error adding pehash into file document %s' % file.sha256_digest) document = db.file_collection.select(file.sha256_digest) return {'pehash': document['pehash']}
def oleobj_markdown(self, json): output = "Ole obj from oletools\n" if not json: raise error.CommandWarning('No ole object was found') j = 0 for i in json: if not i: raise error.CommandWarning('No ole object was found') output += "**Found embedded file =** ") + str(i['Saved_Filename']) + "\n" output += "**Source path =** ") + str(i['Source_path']) + "\n" output += "**Temp path =** ") + str(i['Temp_path']) + "\n" for j in i['samples']: # XXX - Fix me hardcoded URL output += "**Extracted_file.URL:** ") + md.url("SNAKE_URL", "/sample/" + str(j['sha256_digest'])) + '\n' output += "**Extracted_file.DESCRIPTION:** ") + str(j['description']) + '\n' output += "**Extracted_file.MIME:** ") + str(j['mime']) + '\n' output += "**Extracted_file.SIZE:** ") + str(j['size']) + '\n' output += "**Extracted_file.MAGIC:** ") + str(j['magic']) + '\n' output += "**Extracted_file.SHA256:** ") + str(j['sha256_digest']) + '\n' return output
def trid(self, args, file, opts): try: return { 'trid': str(subprocess.check_output([ self.trid_path, file.file_path, '-d:{}'.format(self.triddefs_path) ]), encoding="utf-8").lstrip('\r\n') } except Exception: raise error.CommandWarning( "an error occurred with the trid module")
def peid(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member userdb_path = path.join(path.dirname(__file__), 'userdb.txt') sigs = peutils.SignatureDatabase(userdb_path) except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) try: matches = sigs.match_all(pe, ep_only=True) except Exception: raise error.CommandWarning('error matching peid signatures') # Matches returns a list of lists, hence the stupid indexing output = [] if not matches: pass elif len(matches) == 1: output += [str(matches[0][0])] else: for match in matches: output += [match[0]] return output
def oleid(self, args, file, opts): try: oid = oleid.OleID(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') indicators = oid.check() output = [] for i in indicators: output += [{ 'name': str(i.name), 'value': str(i.value.decode('utf-8')) if isinstance(i.value, bytes) else str(i.value), 'description': str(i.description) }] return output
def exports(self, args, file, opts): try: pe = pefile.PE(file.file_path) # pylint: disable=invalid-name, no-member except Exception as err: raise error.CommandWarning('unable to parse with pefile: %s' % err) output = [] if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: output += [{ 'name': exp.name.decode('utf-8'), 'address': hex(pe.OPTIONAL_HEADER.ImageBase + exp.address) }] return output
def olevba_keywords(self, args, file, opts): try: vbaparser = olevba3.VBA_Parser(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') output = [] if not vbaparser.detect_vba_macros(): vbaparser.close() return output results = vbaparser.analyze_macros() for kw_type, keyword, description in results: output += [{ 'type': kw_type, 'keyword': str(str(keyword).encode('utf-8'))[2:-1], 'description': description }] vbaparser.close() return output
def metadata(self, args, file, opts): try: meta = olefile.OleFileIO(file.file_path).get_metadata() except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') attribs = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', 'template', 'last_saved_by', 'revision_number', 'total_edit_time', 'last_printed', 'create_time', 'last_saved_time', 'num_pages', 'num_words', 'num_chars', 'thumbnail', 'creating_application', 'security', 'codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', 'slides', 'notes', 'hidden_slides', 'mm_clips', 'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', 'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', 'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', 'content_type', 'content_status', 'language', 'doc_version'] output = {} for attrib in attribs: if isinstance(getattr(meta, attrib), bytes): output[attrib] = str(getattr(meta, attrib).decode("utf-8")) else: output[attrib] = str(getattr(meta, attrib)) return output
def check(self): strings = shutil.which('strings') if not strings: raise error.CommandWarning("Binary 'strings' not found") return
def oledir(self, args, file, opts): # Monkeypatch 1 - This is to force the script argument to the appropriate file locaiton for analysis def temp_args(_a, _b): return [file.file_path] # Monkeypatch 2 - This is to customise the write function of tablewriter so that the output can be collected by iostream def custom_write(self, a): a = a.replace(u"\uFFFD", '\n') print(a, end="") # Deoploy Monkeypatch 1 import optparse get_args = optparse.OptionParser._get_args optparse.OptionParser._get_args = temp_args res = [] result = [] try: ole = olefile.OleFileIO(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') # Deploy MonkeyPath 2 for id in range(len(ole.direntries)): res_dict = dict() res_dict['id'] = id d = ole.direntries[id] if d is None: # this direntry is not part of the tree: either unused or an orphan d = ole._load_direntry(id) #ole.direntries[id] # print('%03d: %s *** ORPHAN ***' % (id, d.name)) if d.entry_type == olefile.STGTY_EMPTY: res_dict['Status'] = 'unused' else: res_dict['Status'] = 'ORPHAN' else: # print('%03d: %s' % (id, d.name)) res_dict['Status'] = '<Used>' if d.name.startswith('\x00'): # this may happen with unused entries, the name may be filled with zeroes res_dict['Name'] = '' else: # handle non-printable chars using repr(), remove quotes: res_dict['Name'] = repr(d.name)[1:-1] res_dict['Left'] = oledir.sid_display(d.sid_left) res_dict['Right'] = oledir.sid_display(d.sid_right) res_dict['Child'] = oledir.sid_display(d.sid_child) res_dict['Type'] = oledir.STORAGE_NAMES.get(d.entry_type, 'Unknown') res_dict['1st_Sect'] = d.isectStart res_dict['Size'] = d.size result.append(res_dict) # Retrieving CLSID, making the second table result2 = [] rootname = ole.get_rootentry_name() entry_id = 0 clsid = ole.root.clsid clsid_text, clsid_color = oledir.clsid_display(clsid) res_dict = dict() res_dict['id2'] = entry_id res_dict['obj_tree'] = '-' res_dict['Name'] = rootname res_dict['Size'] = '-' res_dict['CLSID'] = clsid_text result2.append(res_dict) # Creating macro tree, as it is sorted, obj_tree allows us to reconstruct the tree of the macros for entry in sorted(ole.listdir(storages=True)): res_dict = dict() obj_tree = 0 name = entry[-1] # handle non-printable chars using repr(), remove quotes: name = repr(name)[1:-1] name_color = None obj_tree = len(entry)-1 indented_name = "WS"*(len(entry)-1) + name entry_id = ole._find(entry) try: size = ole.get_size(entry) except: size = '-' clsid = ole.getclsid(entry) clsid_text, clsid_color = oledir.clsid_display(clsid) res_dict['id2'] = entry_id res_dict['obj_tree'] = obj_tree res_dict['Name'] = indented_name res_dict['Size'] = size res_dict['CLSID'] = clsid_text result2.append(res_dict) res.append(result) res.append(result2) # Revert patched function to originals ole.close() optparse.OptionParser._get_args = get_args reload(oledir) return res
def scan(self, args, file, opts): # pylint: disable=too-many-locals, too-many-branches compiled_rule_files = [] output = [] if 'rule' in args and args['rule'] != '': rule = args['rule'] rule = rule.strip('.yar').strip('.yara') rule += '.yarac' path = os.path.join(RULES_PATH, rule) if os.path.exists(path): compiled_rule_files.append(path) else: raise error.CommandError('rule file does not exist') else: for _root, _dirs, files in os.walk(RULES_PATH): for f in files: if f.endswith('.yarac'): compiled_rule_files.append(os.path.join(RULES_PATH, f)) # Load each of the compiled yara files for compiled_rule_file in compiled_rule_files: try: rules = yara.load(compiled_rule_file) matches = rules.match(file.file_path) except Exception: # noqa pylint: disable=broad-except continue # Skip if no rule matches if not matches: continue # If the rule index doesn't exist we are likely using the yara plugin and not yara-python if matches[0].rule is None: raise error.CommandWarning( 'incorrect yara python plugin installed') # Loop through each match and append to output for match in matches: try: if match.rule in config.scale_configs['yara'][ 'blacklisted_rules']: continue except Exception: # noqa pylint: disable=broad-except continue # Strings matches are stored as byte arrays, and whilst they can be converted to utf-8 strings, # in the case of hex values these are converted to ASCII which is not the desired output. # e.g: # b'This program cannot be run in DOS mo' = 'This program cannot be run in DOS mo' # b'\x40\x410x42' = @A0x42 output += [{ 'file': str(os.path.basename(compiled_rule_file)), 'rule': str(match.rule), 'hits': [{ 'hit': str(x[2])[2:-1], 'offset': str(x[0]) } for x in match.strings], 'description': str(match.meta['description']) if 'description' in match.meta else '', 'author': str(match.meta['author']) if 'author' in match.meta else '', }] return output
def __init__(self, *args, **kwargs): # pylint: disable=unused-argument raise error.CommandWarning('error')
def check(self): strings = shutil.which('radare2') if not strings: raise error.CommandWarning("binary 'radare2' not found") return
def oleobj(self, args, file, opts): # Monkeypatch 1 - This is to force the script argument to the appropriate file locaiton for analysis def temp_args(_a, _b, _c): return [file.file_path] # Deploy Monkeypatch 1 import optparse get_args = optparse.OptionParser._get_args optparse.OptionParser._get_args = temp_args output = [] fname_prefix = oleobj.sanitize_filename(file.file_path) index = 1 DUMP_CHUNK_SIZE = 4096 result = dict() for ole in oleobj.find_ole(file.file_path, None): if ole is None: # no ole file found continue for path_parts in ole.listdir(): stream_path = '/'.join(path_parts) if path_parts[-1] == '\x01Ole10Native': stream = None try: stream = ole.openstream(path_parts) #print('extract file embedded in OLE object from stream %r:' # % stream_path) #print('Parsing OLE Package') opkg = oleobj.OleNativeStream(stream) # leave stream open until dumping is finished except Exception: raise error.CommandWarning("*** Not an OLE 1.0 Object") if stream is not None: stream.close() continue if opkg.is_link: raise error.CommandWarning('Object is not embedded but only linked to ' '- skip') continue result['SHA256_AnalyzedFile'] = fname_prefix result['Extracted_file.NAME'] = opkg.filename result['Source_path'] = opkg.src_path result['Temp_path'] = opkg.temp_path if opkg.filename: fname = '%s_%s' % (fname_prefix, oleobj.sanitize_filename(opkg.filename)) else: fname = '%s_object_%03d.noname' % (fname_prefix, index) try: result['Saved_Filename'] = fname samples = [] with tempfile.TemporaryDirectory(dir=path.abspath(path.expanduser(config.snake_config['cache_dir']))) as temp_dir: file_path = path.join(temp_dir, fname) with open(file_path, 'wb') as writer: n_dumped = 0 next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) while next_size: data = stream.read(next_size) writer.write(data) n_dumped += len(data) if len(data) != next_size: raise error.CommandWarning('Wanted to read {0}, got {1}' .format(next_size, len(data))) break next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size - n_dumped) file_schema = schema.FileSchema().load({ 'name': fname, 'description': 'extracted with oleobj from ' + fname }) new_file = fs.FileStorage() new_file.create(file_path) new_document = submitter.submit(file_schema, enums.FileType.FILE, new_file, file, NAME) new_document = schema.FileSchema().dump(schema.FileSchema().load(new_document)) samples += [new_document] for i in samples: i['name'] = fname result['samples'] = samples except Exception as exc: raise error.CommandWarning('error dumping to {0} ({1})' .format(fname, exc)) finally: stream.close() index += 1 output.append(result) if not output: raise error.CommandWarning("No ole object was found") return output
vba_code_all_modules += vba_code + '\n' m = mraptor3.MacroRaptor(vba_code_all_modules) m.scan() if m.suspicious: result += [{ 'Result': 'SUSPICIOUS', 'Flags': str(m.get_flags()), 'Match_on': str(m.matches) }] else: result += [{ 'Result': 'Macro seems fine' }] except Exception: # Revert patched functions to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ raise error.CommandWarning('failed to parse macros') # Revert patched function to originals optparse.OptionParser._get_args = get_args sys.stdout = sys.__stdout__ reload(oledir) return result # Table generated is good as is, why change it? def mraptor_markdown(self, json): output = "" if not json: output = "No json\n" return output for i in json: output += "**Macro is:** ") + str(i['Result']) + "\n"