def main(): # Sleep function, program will run infinitly until killed (ctrl-c) while True: # Recursivly walk the target path and create a filename which is relative to the program for root, dirs, files in os.walk(pathtofiles): for name in files: filename = os.path.join(root, name) # Run exiftool to extract all metadata from a file with exiftool.ExifTool() as et: metadata = et.get_metadata(filename) # call the md5sum function to get a hash, then use this as the OID to prevent duplicates in the database md5 = md5sum(filename) metadata[u'_id'] = md5 # create a timestamp which will reflect the time the file is submitted to the database now = datetime.datetime.now() timestamp = now.strftime("%Y:%m:%d %H:%M:%S") metadata[u'File:DateTimeRecieved'] = timestamp # if (-S) flag is set, perform SSDeep hash and insert into JSON if args['SSDeep'] == True: metadata[u'SSDeep'] = ssdeep().hash_file(filename) # remove unwanted keys which were present in exiftool JSON del metadata[u'SourceFile'] del metadata[u'File:FilePermissions'] del metadata[u'File:Directory'] del metadata[u'ExifTool:ExifToolVersion'] # convert the JSON dictionary to a string and run it through Yara matches = rules.match(data=str(metadata)) # Scan full file with yara if -f flag is set, this can be slow if args['fullyara'] == True: fullmatches = rules.match(filename) matches.extend(fullmatches) # Print yara hits, or none..**this will eventually export to logger** if matches: metadata[u'YaraAlerts'] = str(matches) logging.warning(timestamp + ": Yara Matches for " + name + ": " + str(matches) + " MD5: " + md5) else: metadata[u'YaraAlerts'] = "None" logging.debug(timestamp + ": No Yara Matches for " + name + " MD5: " + md5) # insert into mongo collection metadatacollection.insert(metadata) # confirm successful datbase submission (duplicate Md5s will be ignored by mongo, no msg here) logging.info(timestamp + ": Metadata for " + name + " MD5: " +md5 + " added to database") # if -m switch is on, this will move each file to destination dir and remove them from scanning path if args['move']: #Make destination dir per agument if non existant if not os.path.exists(args['move']): os.makedirs(args['move']) shutil.move(filename, args['move'] + name) #Verify move for logs: logging.info(timestamp + ":" + filename + " has been moved to " + args['move'] + name) # if -d switch is on, this will delete each file after scanning. !!BE CAREFUL WITH THIS!! if args['delete'] == True: os.remove(filename) # Confirm delete for logs. logging.info(timestamp + ":" + filename + " has been deleted.") #variable from 'sleep' arg input here, default 15 seconds time.sleep(sleeptime)
def _get_ssdeep(self): if not IS_SSDEEP: return None try: return ssdeep.ssdeep().hash_file(self.file_path) except: return None
def get_ssdeep(self): """Get SSDEEP. @return: SSDEEP. """ if not HAVE_SSDEEP: return None try: return ssdeep.ssdeep().hash_file(self.file_path) except Exception: return None
def get_ssdeep(file_name): try: from ssdeep import ssdeep ss = ssdeep() return ss.hash_file(file_name) except ImportError: try: import ssdeep return ssdeep.hash_from_file(file_name) except ImportError: print 'error: no library `ssdeep` available for import! this feature will not be available.' return None
def get_ssdeep(self): # try to return the ssdeep hash of file try: from ssdeep import ssdeep ss = ssdeep() return ss.hash_file(self.file_name) except ImportError: try: import ssdeep return ssdeep.hash_from_file(self.file_name) except ImportError: print('[error] no library `ssdeep` available for import! this feature will not be available.')
def _get_ssdeep(self): """ Generates the ssdeep fuzzy hash of the file. @return: ssdeep fuzzy hash of the file """ if not IS_SSDEEP: return None try: return ssdeep.ssdeep().hash_file(self.file_path) except: return None
def get_ssdeep(filename): """There are two Python bindings for ssdeep, each with a different interface. So we try Jose's pyssdeep first and if it fails, try the one from pypi. Just install one or the other: http://code.google.com/p/pyssdeep/ http://pypi.python.org/packages/source/s/ssdeep/ssdeep-2.5.tar.gz#md5=fd9e5271c01ca389cc621ae306327ab6 """ try: from ssdeep import ssdeep s = ssdeep() return s.hash_file(filename) except: try: import ssdeep return ssdeep.hash_from_file(filename) except: pass
def get_ssdeep(filename): """There are two Python bindings for ssdeep, each with a different interface. So we try Jose's pyssdeep first and if it fails, try the one from pypi. Just install one or the other: http://code.google.com/p/pyssdeep/ http://pypi.python.org/packages/source/s/ssdeep/ssdeep-2.5.tar.gz#md5=fd9e5271c01ca389cc621ae306327ab6 """ try: from ssdeep import ssdeep s = ssdeep() return s.hash_file(filename) except: try: import ssdeep return ssdeep.hash_from_file(filename) except: pass return ''
def ssdeepsearch(args): """ ssdeep support is based on pyssdep (http://code.google.com/p/pyssdeep/) args should contain a ssdeep hash and a minimal score """ s = ssdeep.ssdeep() h = unicode(args[0].decode("utf-8")) mscore = int(args[1]) if mscore < 10: fritutils.termout.printWarning('"%d" is too low to use as a score.' % mscore) else: fritutils.termout.printMessage("Starting to search for ssdeep hashes.") for f in fritModel.File.query.all(): if f.ssdeep: score = s.compare(f.ssdeep.ssdeep, h) if score >= mscore: fp = os.path.join(f.fullpath.fullpath, f.filename) fritutils.termout.printNormal("Score: %d, %s " % (score, f.fullFileSpec()))
def ssdeepsearch(args): """ ssdeep support is based on pyssdep (http://code.google.com/p/pyssdeep/) args should contain a ssdeep hash and a minimal score """ s = ssdeep.ssdeep() h = unicode(args[0].decode('utf-8')) mscore = int(args[1]) if mscore < 10: fritutils.termout.printWarning('"%d" is too low to use as a score.' % mscore) else: fritutils.termout.printMessage("Starting to search for ssdeep hashes.") for f in fritModel.File.query.all(): if f.ssdeep: score = s.compare(f.ssdeep.ssdeep, h) if score >= mscore: fp = os.path.join(f.fullpath.fullpath, f.filename) fritutils.termout.printNormal("Score: %d, %s " % (score, f.fullFileSpec()))
import os inputs = os.listdir('.') from ssdeep import ssdeep s = ssdeep() hashes = {} for i in inputs: if i.startswith('.'): continue h = s.hash_file(i) print 'FILE: %s %20s' % (i, h) hashes[i] = h if os.path.isdir(i): continue f = open(i, 'rb') print 'BYTES: %s %20s' % (i, s.hash_bytes(f.read())) f.close() print 'doing comparison' for k, v in hashes.iteritems(): for k2, v2 in hashes.iteritems(): print '%s <-> %s %d' % (k, k2, s.compare(v, v2))
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ # import necessary libraries from ssdeep import ssdeep # for fuzzy hashing https://code.google.com/p/pyssdeep/ import pefile # used to break up the PE file import sys # print to screen support import sqlite3 # used to sqlite3 logging support import hashlib # used for MD5 support # get to coding debug = True ssd = ssdeep() try: filename = file(sys.argv[1]) # make a file object except IndexError: print "What file do you want me to analyze?" sys.exit() md5v = hashlib.md5(filename.read()).hexdigest() # compute MD5 checksum of the entire binary, used as a way to identify which PE sections belong to other PE sections unanalyzed = True try: pe = pefile.PE(sys.argv[1]) # make the file to be examined an object of the pefile library connection = sqlite3.connect('fuzzyhash.db') # connect to database try:
import math import Levenshtein import ngram import IPy from datetime import datetime try: from ssdeep import ssdeep fuzzyhash = ssdeep() except ImportError: import ssdeep fuzzyhash = ssdeep DATE_PATTERN = '%Y-%m-%d' N = ngram.NGram() Ngram_compare = N.compare ''' Geolocatin similarity Input node shall have 1. id
def main(): # Sleep function, program will run infinitly until killed (ctrl-c) while True: # Recursivly walk the target path and create a filename which is relative to the program for root, dirs, files in os.walk(pathtofiles): for name in files: filename = os.path.join(root, name) # Run exiftool to extract all metadata from a file with exiftool.ExifTool() as et: metadata = et.get_metadata(filename) # call the md5sum function to get a hash, then use this as the OID to prevent duplicates in the database md5 = md5sum(filename) metadata[u'_id'] = md5 # create a timestamp which will reflect the time the file is submitted to the database now = datetime.datetime.now() timestamp = now.strftime("%Y:%m:%d %H:%M:%S") metadata[u'File:DateTimeRecieved'] = timestamp # if (-S) flag is set, perform SSDeep hash and insert into JSON if args['SSDeep'] == True: metadata[u'SSDeep'] = ssdeep().hash_file(filename) # remove unwanted keys which were present in exiftool JSON del metadata[u'SourceFile'] del metadata[u'File:FilePermissions'] del metadata[u'File:Directory'] del metadata[u'ExifTool:ExifToolVersion'] # convert the JSON dictionary to a string and run it through Yara matches = rules.match(data=str(metadata)) # Scan full file with yara if -f flag is set, this can be slow if args['fullyara'] == True: fullmatches = rules.match(filename) matches.extend(fullmatches) # Print yara hits, or none..**this will eventually export to logger** if matches: metadata[u'YaraAlerts'] = str(matches) logging.warning(timestamp + ": Yara Matches for " + name + ": " + str(matches) + " MD5: " + md5) else: metadata[u'YaraAlerts'] = "None" logging.debug(timestamp + ": No Yara Matches for " + name + " MD5: " + md5) # insert into mongo collection metadatacollection.insert(metadata) # confirm successful datbase submission (duplicate Md5s will be ignored by mongo, no msg here) logging.info(timestamp + ": Metadata for " + name + " MD5: " + md5 + " added to database") # if -m switch is on, this will move each file to destination dir and remove them from scanning path if args['move']: #Make destination dir per agument if non existant if not os.path.exists(args['move']): os.makedirs(args['move']) shutil.move(filename, args['move'] + name) #Verify move for logs: logging.info(timestamp + ":" + filename + " has been moved to " + args['move'] + name) # if -d switch is on, this will delete each file after scanning. !!BE CAREFUL WITH THIS!! if args['delete'] == True: os.remove(filename) # Confirm delete for logs. logging.info(timestamp + ":" + filename + " has been deleted.") #variable from 'sleep' arg input here, default 15 seconds time.sleep(sleeptime)
def collect(self): data = self.data out = [] if data == None or len(data) == 0: out.append("Cannot read %s (maybe empty?)" % file) out.append("") return out try: pe = pefile.PE(data=data, fast_load=True) pe.parse_data_directories( directories=[ pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_TLS'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']]) except: out.append("Cannot parse %s (maybe not PE?)" % file) out.append("") return out # Signature Checks if sys.modules.has_key('yara'): yarahits = self.check_yara(data) else: yarahits = [] if len(yarahits): out.append(self.header("Signature scans")) out.append(yarahits) #Meta Data out.append(self.header("Meta-data")) out.append("Size: %d bytes" % len(data)) out.append("Date: %s" % self.get_timestamp(pe)) exportdll = self.check_exportdll(pe) if len(exportdll): out.append("ExportDll: %s" % exportdll) (ep,name) = self.check_ep_section(pe) s = "EP: %s (%s)" % (hex(ep+pe.OPTIONAL_HEADER.ImageBase), name) if name not in good_ep_sections: s += " [SUSPICIOUS]" out.append(s) if sys.modules.has_key('magic'): if sys.version_info <= (2, 6): out.append("Type: %s" % self.ms.buffer(data)) else: out.append("Type: %s" % magic.from_buffer(data)) out.append("MD5: %s" % hashlib.md5(data).hexdigest()) out.append("SHA1: %s" % hashlib.sha1(data).hexdigest()) out.append("SHA256: %s" % hashlib.sha256(data).hexdigest()) if sys.modules.has_key('ssdeep'): s = ssdeep() out.append("ssdeep: %s" % s.hash_file(file)) packers = self.check_packers(pe) if len(packers): out.append("Packers: %s" % ','.join(packers)) #Version Info verinfo = self.check_verinfo(pe) if len(verinfo): out.append(self.header("Version info")) out.append(verinfo) #Sections out.append(self.header("Sections")) out.append("%-10s %-12s %-12s %-12s %-12s" % ("Name", "VirtAddr", "VirtSize", "RawSize", "Entropy")) out.append("-" * 60) for sec in pe.sections: s = "%-10s %-12s %-12s %-12s %-12f" % ( ''.join([c for c in sec.Name if c in string.printable]), hex(sec.VirtualAddress), hex(sec.Misc_VirtualSize), hex(sec.SizeOfRawData), sec.get_entropy()) if sec.SizeOfRawData == 0 or \ (sec.get_entropy() > 0 and sec.get_entropy() < 1) or \ sec.get_entropy() > 7: s += "[SUSPICIOUS]" out.append(s) #Resources resources = self.check_rsrc(pe) if len(resources): out.append(self.header("Resource entries")) out.append("%-18s %-12s %-12s Type" % ("Name", "RVA", "Size")) out.append("-" * 60) for rsrc in resources.keys(): (name,rva,size,type) = resources[rsrc] out.append("%-18s %-12s %-12s %s" % (name,hex(rva),hex(size),type)) #TLS Callbacks callbacks = self.check_tls(pe) if len(callbacks): out.append(self.header("TLS callbacks")) for cb in callbacks: out.append(" 0x%x" % cb) #Exports exports = self.check_exports(pe) if len(exports): out.append(self.header("Exported Functions")) out.append("%-10s %-30s%s" % ("Ordinal", "Name", "Forwarder")) out.append("-" * 60) for exp in exports: out.append(exp) #Libraries libs = self.check_libs(pe) if len(libs): out.append(self.header("Import Libs")) for lib in libs: out.append(lib) #Imports imports = self.check_imports(pe) if len(imports): out.append(self.header("Imported Functions")) for imp in imports: out.append(imp) #Strings results = [] patterns = ["[ -~]{2,}[\\\/][ -~]{2,}", "[ -~]{2,}\.[ -~]{2,}","\\\[ -~]{5,}","^[ -~]{5,}[\\\/]$","[ -~]+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}[ -~]+"] for pattern in patterns: regex = re.compile(pattern) results += regex.findall(data) if len(results): out.append(self.header("Interesting Strings")) out += list(set(results)) out.append("") return out