def _analyze_compressed_file(parent, node, path, nesting_level): m_type = mime.from_file(path) size = os.path.getsize(path) m = md5() s = sha1() with open(path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): m.update(chunk) s.update(chunk) str_md5 = m.hexdigest() str_sha1 = s.hexdigest() str_fuzzy = ssdeep.hash_from_file(path) node['filename'] = os.path.basename(path) node['mime_type'] = m_type node['size'] = size node['md5'] = str_md5 node['sha1'] = str_sha1 node['fuzzy'] = str_fuzzy node['nesting_level'] = nesting_level + 1 str_fuzzy = ssdeep.hash_from_file(path) node['fuzzy'] = str_fuzzy if parent is None: node['parent_hash'] = None else: node['parent_hash'] = parent.get('sha1') node['compressed_children'] = [] # If this is a compressed file, analyze it recursively. This means we need to create a new directory, uncompress # files there and calculate hashes. Then, delete the extracted files when done. # zip, x-tar, x-7z-compressed, x-rar, vnd.ms-cab-compressed, gzip, x-bzip2, x-7z-compressed tmpdir = tempfile.mkdtemp() try: # Brute force approach: we don't even check the mime file. We try to unpack evey archive. # Extract all the files patoolib.extract_archive(path, outdir=tmpdir) # Analyze each file files = [ os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if os.path.isfile(os.path.join(tmpdir, f)) ] for f in files: child = dict() _analyze_compressed_file(parent=node, node=child, path=f, nesting_level=nesting_level + 1) node['compressed_children'].append(child) except: pass finally: # Remove the temporary file directory shutil.rmtree(tmpdir)
def main(): parser = argparse.ArgumentParser() parser.add_argument("originalFile", help="File to antifuzz") parser.add_argument("--newFile", help="Name of the antifuzzed file") parser.add_argument( "-m", action='store_true', default=False, help= "Change the metadata of the file instead, will still change the ssdeep hash" ) args = parser.parse_args() pattern = re.compile('mp3$') if args.newFile is None: args.newFile = args.originalFile if not args.originalFile.endswith('.mp3'): print "Please use a file with the .mp3 extension for your original file" return 1 if not args.newFile.endswith('.mp3'): print "Please use a file with the .mp3 extension for your newfile" return 1 # Take in file ogFile = args.originalFile # Make copy of file nFile = args.newFile # Hash original file ogHash = ssdeep.hash_from_file(ogFile) # Make changes to given file mp3(ogFile, nFile, args) # Hash new file newHash = ssdeep.hash_from_file(nFile) # Compare the hashes diff = str(ssdeep.compare(ogHash, newHash)) print("The files are " + diff + "% similar") return 0
def test_hash_from_file(self): with pytest.raises(IOError): ssdeep.hash_from_file("tests/files/") with pytest.raises(IOError): ssdeep.hash_from_file("tests/files/file-does-not-exist.txt") res = ssdeep.hash_from_file("tests/files/file.txt") assert res == "3:AXGBicFlgVNhBGcL6wCrFQE3:AXGHsNhxLsr2s"
def _on_diff_type_diff(self, sender, ctx): if sender.a_path is None or sender.new_file: operation = 'A' # Added elif sender.b_path is None or sender.deleted_file: operation = 'D' # Deleted elif not sender.diff: operation = 'R' # Renamed else: operation = 'M' # Modified data = { 'operation': operation, # Relative paths to the repository 'a_rel_path': sender.a_path, 'b_rel_path': sender.b_path, } if sender.a_path is not None and operation != 'A': if ctx['a_path'].is_file(): a_fs_path = ctx['a_path'] else: a_fs_path = ctx['a_path'] / sender.a_path data['a_ref'] = os.fspath(sender.a_path) data['a_md5'] = utils.md5(a_fs_path) data['a_mime'] = magic.from_file(os.fspath(a_fs_path), mime=True) data['a_ssdeep'] = ssdeep.hash_from_file(os.fspath(a_fs_path)) data['a_size'] = a_fs_path.stat().st_size else: data['a_size'] = 0 if sender.b_path is not None and operation != 'D': if ctx['b_path'].is_file(): b_fs_path = ctx['b_path'] else: b_fs_path = ctx['b_path'] / sender.b_path # FIXME: parent $ ref when unpacking data['b_ref'] = utils.construct_path(sender.b_path, parent=ctx.get('b_ref')) data['b_ref'] = os.fspath(b_fs_path) data['b_md5'] = utils.md5(b_fs_path) data['b_mime'] = magic.from_file(os.fspath(b_fs_path), mime=True) data['b_ssdeep'] = ssdeep.hash_from_file(os.fspath(b_fs_path)) data['b_size'] = b_fs_path.stat().st_size else: data['b_size'] = 0 if data.get('a_ssdeep') and data.get('b_ssdeep'): data['diff'] = sender.diff.decode() data['similarity'] = ssdeep.compare(data['a_ssdeep'], data['b_ssdeep']) else: data['similarity'] = 0.0 self.diffs.append(data)
def main(known_file, comparison, output_type): """ The main function handles the main operations of the script :param known_file: path to known file :param comparison: path to look for similar files :param output_type: type of output to provide :return: None """ # Check output formats if output_type not in OUTPUT_OPTS: logger.error("Unsupported output format '{}' selected. Please " "use one of {}".format(output_type, ", ".join(OUTPUT_OPTS))) sys.exit(2) elif output_type == 'csv': # Special handling for CSV headers print('"similarity","known_file","known_hash",' '"comp_file","comp_hash"') # Check provided file paths known_file = os.path.abspath(known_file) comparison = os.path.abspath(comparison) # Generate ssdeep signature for known file if not os.path.exists(known_file): logger.error("Error - path {} not found".format(comparison)) sys.exit(1) known_hash = ssdeep.hash_from_file(known_file) # Generate and test ssdeep signature for comparison file(s) if os.path.isdir(comparison): # Process files in folders for root, _, files in os.walk(comparison): for f in files: file_entry = os.path.join(root, f) comp_hash = ssdeep.hash_from_file(file_entry) comp_val = ssdeep.compare(known_hash, comp_hash) output(known_file, known_hash, file_entry, comp_hash, comp_val, output_type) elif os.path.isfile(comparison): # Process a single file comp_hash = ssdeep.hash_from_file(comparison) comp_val = ssdeep.compare(known_hash, comp_hash) output(known_file, known_hash, file_entry, comp_hash, comp_val, output_type) else: logger.error("Error - path {} not found".format(comparison)) sys.exit(1)
def comparetrees(dir1, dir2, diffs): """ Compare all subdirectories and files in two directory trees Same files have a matching score of 100 Symlinks have a matching score of 100 Different files have a matching score calculated using ssdeep (0 to 100) """ names1 = os.listdir(dir1) names2 = os.listdir(dir2) comparedirs(dir1, dir2, diffs, names1, names2) common = intersect(names1, names2) missed = common[:] # compare contents of files in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isfile(path1) and os.path.isfile(path2): missed.remove(name) file1 = open(path1, 'rb') file2 = open(path2, 'rb') while True: bytes1 = file1.read(blocksize) bytes2 = file2.read(blocksize) if (not bytes1) and (not bytes2): # same file print ' 100 matches', '/'.join(path1.split('/')[1:]) diffs.append(100) break if bytes1 != bytes2: # different content score = ssdeep.compare(ssdeep.hash_from_file(path1), ssdeep.hash_from_file(path2)) print str(score).rjust(5), 'differs', '/'.join( path1.split('/')[1:]) diffs.append(score) break # recur to compare directories in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isdir(path1) and os.path.isdir(path2): missed.remove(name) comparetrees(path1, path2, diffs) # same name but not both files or dirs (symlinks) for name in missed: diffs.append(100) print(' - ignored ' + name + ' (symlink)')
def ssdeep_hash(filename): try: return ssdeep.hash_from_file(filename) except Exception, e: logger.error("Error while computing ssdeep hash of file {} - {}".format(filename, e.message), exc_info=True)
def upload(request): if request.method == "GET": upload_form = UploadForm() elif request.method == "POST": up_file = request.FILES['upload_file'] up_file_md5 = get_hash_str(up_file) UploadFile_obj = UploadFile(id=up_file_md5, upload_file=up_file) UploadFile_obj.save() up_file_url = os.path.join(settings.MEDIA_ROOT, UploadFile_obj.upload_file.name) #ops_file_url = make_ops(up_file_url) up_file_ssdeep = ssdeep.hash_from_file(up_file_url) sys.stderr.write(up_file_ssdeep) UploadFileMeta_obj = UploadFileMeta(id=up_file_md5, ssdeep=up_file_ssdeep) UploadFileMeta_obj.save() response = {'status': 200, 'pk': up_file_md5} return HttpResponse(json.dumps(response), content_type='application/json') ctx = { 'upload_form': upload_form, } return render(request, 'upload.html', ctx)
def get_hash(pe, filename): # Import Hash ih = pe.get_imphash() fh = open(filename, 'rb') m = hashlib.md5() s = hashlib.sha1() s2 = hashlib.sha256() s5 = hashlib.sha512() while True: data = fh.read(8192) if not data: break m.update(data) s.update(data) s2.update(data) s5.update(data) md5 = m.hexdigest() sha1 = s.hexdigest() sha2 = s2.hexdigest() sha5 = s5.hexdigest() hashdeep = ssdeep.hash_from_file(filename) return md5, sha1, ih, hashdeep, sha2, sha5
def hash_calc(malware_path): print(malware_path) pe = pefile.PE(malware_path) imp_hash = pe.get_imphash() ssdeep_hash = ssdeep.hash_from_file(malware_path) sha = sha1(open(malware_path, 'rb').read()).hexdigest() return imp_hash, ssdeep_hash, sha
def malwaresignature(input_malware): malware_file = input_malware malware = os.path.basename(malware_file) with open(malware_file, 'rb') as f: header = f.read(32) for call in filetypes: if call in header: print("Loading...") ts = os.path.getctime(malware_file) dt = datetime.fromtimestamp(ts, timezone.utc) ISO8601 = dt.astimezone().isoformat() record = pefile.PE(malware_file) fuzzyhash = ssdeep.hash_from_file(malware_file) importeddlls = [] for access in record.DIRECTORY_ENTRY_IMPORT: dlls = access.dll.decode('utf-8') importeddlls.append(dlls) arch = record.FILE_HEADER.Machine hashmethod = hashlib.sha256() with open(malware_file, 'rb') as malwarefile: reader = malwarefile.read() hashmethod.update(reader) hashvalue = hashmethod.hexdigest() imphash = record.get_imphash() warnings = record.get_warnings() nameandSHA = {"Name of sample: ": malware,"Hash Value: " :hashvalue} with open ('nameandsha.json', 'w') as k: data = json.dumps(nameandSHA) k.write(data) ent = entropy(input_malware) print("Done") return malware_file,ISO8601, hashvalue, arch, importeddlls, imphash, fuzzyhash,warnings, ent
def hashFileCreator(): screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg') screenshot = screenshot.resize((100, 100)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10) screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif') hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif') print hash
def scan(filelist): results = [] for fname in filelist: goodtogo = False i = 0 # Ran into a weird issue with file locking, this fixes it while not goodtogo and i < 5: try: ssdeep_hash = ssdeep.hash_from_file(fname) chunksize, chunk, double_chunk = ssdeep_hash.split(':') chunksize = int(chunksize) doc = { 'ssdeep_hash': ssdeep_hash, 'chunksize': chunksize, 'chunk': chunk, 'double_chunk': double_chunk, 'analyzed': 'false', 'matches': {}, } results.append((fname, doc)) goodtogo = True except Exception as e: print('ssdeeper:', e) time.sleep(3) i += 1 metadata = {} metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Include"] = False return (results, metadata)
def gen_ssdeep_hash(self, filepath, exclude=False): files = os.listdir(filepath) for file in files: if not os.path.isfile(filepath + file): print "[+] WARNING: %s is not a file and will not analysis it. " % ( filepath + file) continue tmp_ssdeep_hash = '' if self.cluster_type == 'strings_ssdeep': data = os.popen('strings %s' % (filepath + file)).read() tmp_ssdeep_hash = ssdeep.hash(data) elif self.cluster_type == 'file_ssdeep': tmp_ssdeep_hash = ssdeep.hash_from_file(filepath + file) elif self.cluster_type == 'imp_exp_ssdeep': imp_exp_str = imp_exp_functions(filepath + file) if imp_exp_str: tmp_ssdeep_hash = ssdeep.hash(imp_exp_str) if tmp_ssdeep_hash: dst_file = self.tmpdir + str(self.count) f = open(dst_file, 'w') f.write(tmp_ssdeep_hash) f.close() self.count += 1 if tmp_ssdeep_hash not in self.ssdeep_stats.keys(): self.ssdeep_stats[tmp_ssdeep_hash] = [] tmp_file_ssdeep = {} tmp_file_ssdeep['file_path'] = filepath + file tmp_file_ssdeep['file_md5'] = file_md5(filepath + file) tmp_file_ssdeep['cluster_type'] = self.cluster_type tmp_file_ssdeep['exclude'] = 1 if exclude else 0 self.ssdeep_stats[tmp_ssdeep_hash].append(tmp_file_ssdeep)
def getmalwaresignature(input_malware): malwares_files = os.listdir(input_malware) for malware in malwares_files: malware_file = os.path.join(input_malware, malware) with open(malware_file, 'rb') as f: header = f.read(32) for call in filetypes: if call in header: record = pefile.PE(malware_file) fuzzyhash = ssdeep.hash_from_file(malware_file) nameandfuzzy = {malware: fuzzyhash} print("Fuzzy hash of file: " + malware, fuzzyhash) with open('fuzzyhashlist.csv', 'a') as e: for key in nameandfuzzy.keys(): e.write("%s,%s\n" % (key, nameandfuzzy[key])) access = None importeddlls = [] for access in record.DIRECTORY_ENTRY_IMPORT: dlls = access.dll.decode('utf-8') print(dlls + "\n") importeddlls.append(dlls) if hex(record.FILE_HEADER.Machine) == '0x14c': print("This is a 32-bit binary") else: print("This is a 64-bit binary") timestamp = (record.FILE_HEADER.dump_dict()['TimeDateStamp']['Value'].split('[')[1][:-1]) print("Timestamp " + timestamp) for optional in record.OPTIONAL_HEADER.DATA_DIRECTORY: print(optional.name, str(optional.Size), str(optional.VirtualAddress) + '\n') hashmethod = hashlib.sha256() with open(malware_file, 'rb') as malwarefile: reader = malwarefile.read() hashmethod.update(reader) print("The file name is: ", malware + " The sha256 hash of the file is:", hashmethod.hexdigest()) hashes = {"Name of malware: ": malware, "Imphash:": record.get_imphash(), "SHA256:": hashmethod.hexdigest()} warnings = record.get_warnings() warnstring = {"Name of file :": malware, " Warning :": (str(warnings).strip('[]'))} basicfiledata = None if access: basicfiledata = {"Name of Malware:": malware, "Imported DLLs": importeddlls, "Arch": record.FILE_HEADER.Machine, "Timestamp": timestamp} print(basicfiledata) cprint(figlet_format( "Check the CSV and Json files.", font='digital'), color='green') with open('hashes.json', 'a') as f: hashdata = json.dumps(hashes) f.write(hashdata + "\n") with open('warnings.json', 'a') as f: warndata = json.dumps(warnstring) f.write(warndata + "\n") with open('info.json', 'a') as outfile: if basicfiledata: jdata = json.dumps(basicfiledata) outfile.write(jdata + "\n")
def get_ssdeep(file_path): """ :param file_path: absolute path of desired file :return: ssdeep """ return ssdeep.hash_from_file(file_path)
def hash(fileSystemPosition): try: ssdeepValue = ssdeep.hash_from_file(fileSystemPosition) return ssdeepValue except Exception as e: print str(e.message) ssdeepValue = "(None)" return ssdeepValue
def comparetrees(dir1, dir2, diffs): """ Compare all subdirectories and files in two directory trees Same files have a matching score of 100 Symlinks have a matching score of 100 Different files have a matching score calculated using ssdeep (0 to 100) """ names1 = os.listdir(dir1) names2 = os.listdir(dir2) comparedirs(dir1, dir2, diffs, names1, names2) common = intersect(names1, names2) missed = common[:] # compare contents of files in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isfile(path1) and os.path.isfile(path2): missed.remove(name) file1 = open(path1, 'rb') file2 = open(path2, 'rb') while True: bytes1 = file1.read(blocksize) bytes2 = file2.read(blocksize) if (not bytes1) and (not bytes2): # same file print ' 100 matches','/'.join(path1.split('/')[1:]) diffs.append(100) break if bytes1 != bytes2: # different content score = ssdeep.compare(ssdeep.hash_from_file(path1),ssdeep.hash_from_file(path2)) print str(score).rjust(5),'differs','/'.join(path1.split('/')[1:]) diffs.append(score) break # recur to compare directories in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isdir(path1) and os.path.isdir(path2): missed.remove(name) comparetrees(path1, path2, diffs) # same name but not both files or dirs (symlinks) for name in missed: diffs.append(100) print(' - ignored '+name+' (symlink)')
def get_info(filepath): result = {} result['size'] = get_filesize(filepath) result['md5'] = hashlib.md5(open(filepath, 'rb').read()).hexdigest() result['sha1'] = hashlib.sha1(open(filepath, 'rb').read()).hexdigest() result['ssdeep'] = ssdeep.hash_from_file(filepath) result['type'] = (getoutput('file %s' % filepath).split('%s: ' % filepath)[1]) return result
def directoryController(known, comparison): """ The directoryController function processes a directory and hands each file to the fileController :param known: str path to the known file :param comparison: str path to the comparison directory :return: list of dictionaries containing comparison results """ logging.info('Processing Directory') known_hash = ssdeep.hash_from_file(known) # Prepare progressbar files_to_process = list() for root, directories, files in os.walk(comparison): for file_entry in files: file_entry_path = os.path.abspath(os.path.join(root, file_entry)) files_to_process.append(file_entry_path) pb_widgets = [ progressbar.Bar(), ' ', progressbar.SimpleProgress(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(widgets=pb_widgets, maxval=len(files_to_process)) pbar.start() compared_hashes = [] for count, file_path in enumerate(files_to_process): try: comparison_hash = ssdeep.hash_from_file(file_path) except IOError as e: logging.error('Could not open ' + file_path + ' | ' + str(e)) pbar.update(count) continue hash_comparison = ssdeep.compare(known_hash, comparison_hash) compared_hashes.append({ 'file_path': file_path, 'similarity': hash_comparison }) pbar.update(count) pbar.finish() return compared_hashes
def hashgen(path, fileName): screenshot = Image.open(path + fileName) screenshot = screenshot.resize((10, 10)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=3) screenshot.save(path + 'compressed.gif') hash = ssdeep.hash_from_file(path + 'compressed.gif') os.remove(path + 'compressed.gif') return hash
def compute_fuzzy_hash(self, file): file_size_in_bytes = os.path.getsize(file.absolute_path) file_size_in_kb = file_size_in_bytes / 1024 if file_size_in_kb < 4: file.delete_fuzzy_hash_entries(file) elif file_size_in_kb >= 4: file.add_fuzzy_hash(file, ssdeep.hash_from_file(file.absolute_path))
def display_hashes(self, filepath, data, pe): """Display md5, sha1 and sh256 of the data given""" for algo in ["md5", "sha1", "sha256"]: m = getattr(hashlib, algo)() m.update(data) print("%-14s %s" % (algo.upper()+":", m.hexdigest())) print("%-14s %s" % ("IMPHASH:", pe.get_imphash())) print("%-14s %s" %("SSDEEP:", ssdeep.hash_from_file(filepath)))
def fileinfo(self, path: str) -> Dict: path = safe_str(path) data = get_digests_for_file(path, on_first_block=self.ident) data["ssdeep"] = ssdeep.hash_from_file(path) # Check if file empty if not int(data.get("size", -1)): data["type"] = "empty" # Futher identify zip files based of their content elif data["type"] in [ "archive/zip", "java/jar", "document/office/unknown" ]: data["type"] = zip_ident(path, data["type"]) # Further check CaRT files, they may have an explicit type set elif data["type"] == "archive/cart": data["type"] = cart_ident(path) # Further identify dos executables has this may be a PE that has been misidentified elif data["type"] == "executable/windows/dos": data["type"] = dos_ident(path) # If we're so far failed to identified the file, lets run the yara rules elif "unknown" in data["type"] or data["type"] == "text/plain": data["type"] = self.yara_ident(path, data, fallback=data["type"]) # Extra checks for office documents # - Check for encryption if data["type"] in [ "document/office/word", "document/office/excel", "document/office/powerpoint", "document/office/unknown", ]: try: msoffcrypto_obj = msoffcrypto.OfficeFile(open(path, "rb")) if msoffcrypto_obj and msoffcrypto_obj.is_encrypted(): data["type"] = "document/office/passwordprotected" except Exception: # If msoffcrypto can't handle the file to confirm that it is/isn't password protected, # then it's not meant to be. Moving on! pass # Extra checks for PDF documents # - Check for encryption # - Check for PDF collection (portfolio) if data["type"] == "document/pdf": # Password protected documents typically contain '/Encrypt' pdf_content = open(path, "rb").read() if re.search(b"/Encrypt", pdf_content): data["type"] = "document/pdf/passwordprotected" # Portfolios typically contain '/Type/Catalog/Collection elif re.search(b"/Type/Catalog/Collection", pdf_content): data["type"] = "document/pdf/portfolio" return data
def fileController(known, comparison): """ The fileController function fuzzy hashes and compares a file :param known: path to known file to use for comparison :param comparison: list of hashes from the comparison file :return: dictionary of file_path and similarity for output """ logging.info('Processing File') known_hash = ssdeep.hash_from_file(known) comparison_hash = ssdeep.hash_from_file(comparison) hash_comparison = ssdeep.compare(known_hash, comparison_hash) return { 'file_path': os.path.abspath(comparison), 'similarity': hash_comparison }
def getHashes(filename): with open(filename, "r") as file: print "File Name:\t", filename print "MD5:\t\t", hashlib.md5(file.read()).hexdigest() print "SHA1:\t\t", hashlib.sha1(file.read()).hexdigest() print "SHA256:\t\t", hashlib.sha256(file.read()).hexdigest() print "SHA512:\t\t", hashlib.sha512(file.read()).hexdigest() print "SSDeep:\t\t", str(ssdeep.hash_from_file(filename)) print "File Size:\t", os.path.getsize(filename), "bytes"
def add_db_record(cursor, filename, quiet): """ docstring """ skip_hash = False absolute = os.path.abspath(filename) try: tempstat = os.stat(absolute) perms = oct(tempstat.st_mode) owner = tempstat.st_uid group = tempstat.st_gid size = tempstat.st_size except OSError as err: print "[-] Couldn't open %s: %s" % (absolute, err) return False # Skip hashing if the file is a FIFO, because the script will # just hang forever trying to read data to calculate a hash. if stat.S_ISFIFO(os.stat(absolute).st_mode): skip_hash = True fuzzy_hash = "FIFO" md5digest = "FIFO" sha1digest = "FIFO" # Determine file type with libmagic filetype = magic.detect_from_filename(absolute).name if quiet is False: print "[+] Adding %s -- %s" % (filename, filetype) if skip_hash is False: # Calculate ssdeep hash try: fuzzy_hash = ssdeep.hash_from_file(absolute) except IOError: fuzzy_hash = "PERMISSION DENIED" except UnicodeDecodeError: fuzzy_hash = "UNICODE DECODE ERROR" # Calculate MD5 hash md5hash = hashlib.md5() md5hash.update(open(absolute).read()) md5digest = md5hash.hexdigest() # Calculate SHA1 hash sha1hash = hashlib.sha1() sha1hash.update(open(absolute).read()) sha1digest = sha1hash.hexdigest() cursor.execute( "INSERT INTO hashes VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME())", (HOSTNAME, absolute, size, perms, owner, group, fuzzy_hash, md5digest, sha1digest, filetype)) return True
def get_ssdeep(self): if args.ssdeep and 'ssdeep' in sys.modules: try: result = {"ssdeep": ssdeep.hash_from_file(self.file)} return result except IOError as e: self.errors.append(f"SSDeepIOError[{e.strerror}]") except ssdeep.InternalError as e: self.errors.append(f"SSDeepError[{e}]") return {}
def bulk_ssdeep(lst = []): for path in lst: try: fileSsdeep = ssdeep.hash_from_file(path) except IOError as ioe: print "Error:\t" + str(ioe) ssdeepList.append(fileSsdeep)
def file_info(filename): info = [] with open(filename, 'rb') as f: file = f.read() info.append("File: {}".format(filename)) info.append("Size: {} bytes".format(os.path.getsize(filename))) info.append("Type: {}".format(magic.from_file(filename, mime=True))) info.append("MD5: {}".format(hashlib.md5(file).hexdigest())) info.append("SHA1: {}".format(hashlib.sha1(file).hexdigest())) if ssdeep_r: info.append("ssdeep: {}".format(ssdeep.hash_from_file(filename))) return info
def file(inFile): with open(inFile, 'rb') as afile: #read file as binary via 'rb' to prevent corruption buffer = afile.read(BLOCKSIZE) while len(buffer) > 0: h.update(buffer) buffer = afile.read(BLOCKSIZE) print os.path.abspath(inFile) + '\t' + '(' + hash_name + ')\t' + h.hexdigest() if args.s == 'true': print os.path.abspath(inFile) + '\t' + '(ssdeep)\t' + ssdeep.hash_from_file(args.f)
def run(self, args, data): """Display md5, sha1 and sh256 of the data given""" for algo in ["md5", "sha1", "sha256"]: m = getattr(hashlib, algo)() m.update(data) print("%-14s %s" % (algo.upper() + ":", m.hexdigest())) print("%-14s %s" % ("IMPHASH:", "(unavailable)")) print("%-14s %s" % ("SSDEEP:", ssdeep.hash_from_file(args.PEFILE))) print("Size: %d bytes" % len(data)) print("Type: %s" % magic.from_buffer(data)) print("Compile Time: %s" % "(unavailable)") print("Observered Path: %s" % os.path.abspath(args.PEFILE)) print("Observered Filename: %s" % ntpath.basename(args.PEFILE))
def mountCrawler(self, fileName): if (fileName != ""): try: # Opens the file for reading self.logger.info("Hashing {}...".format(fileName)) self.settings.setHashListComp("{},{}".format(ssdeep.hash_from_file(fileName), fileName)) except Exception as err: # Loggs if any error happened during file operations self.logger.error("Reading the file failed with error: {}".format(err)) pass else: self.logger.error("Reading Worker received an empty filename") return
def get_page_data(response): page = WebPage() page['uri'] = response.url page['status_code'] = response.status if 'screenshot' in response.meta: page['screenshot'] = response.meta['screenshot'] page['ssdeep_pagesource'] = str(ssdeep.hash(response.body)) try: screenshot_hash = ssdeep.hash_from_file(response.meta['screenshot']) page['ssdeep_screenshot'] = screenshot_hash except: log.msg("Could not create hash from screenshot: " + response.meta['screenshot'], level=log.DEBUG) return page
def calculatehashes(directory, oldhashes={}): ourhashes = {} # get list of all files in the directory dirlist = os.listdir(directory) # iterate through each file for f in dirlist: # skip files already in hash DB if f in oldhashes: # use previously-calculated hash ourhashes[f] = oldhashes[f] else: # calculate hash and store ourhashes[f] = ssdeep.hash_from_file(os.path.join(directory, f)) return ourhashes
def get_page_info(self): page = WebPage() page['uri'] = self.response.url page['status_code'] = self.response.status page['useragent'] = self.response.meta.get('User-Agent') page['referer'] = self.response.meta.get('Referer') if 'screenshot' in self.response.meta: page['screenshot'] = self.response.meta['screenshot'] page['ssdeep_pagesource'] = str(ssdeep.hash(self.response.body)) try: screenshot_hash = ssdeep.hash_from_file(self.response.meta['screenshot']) page['ssdeep_screenshot'] = screenshot_hash except: log.msg("Could not create hash from screenshot: " + self.response.meta['screenshot'], level=log.DEBUG) return page
def get_detailes(self, data, _path): ''' get general details of file ''' data["Details"] = deepcopy(self.datastruct) temp_f = open(_path, "rb").read() open(_path, "rb").read(4) data["Details"]["Properties"] = {"Name": path.basename(_path).lower(), "md5": md5(temp_f).hexdigest(), "sha1": sha1(temp_f).hexdigest(), "sha256": sha256(temp_f).hexdigest(), "ssdeep": hash_from_file(_path), "size": convert_size(path.getsize(_path)), "bytes": path.getsize(_path), "mime": from_file(_path, mime=True), "extension": guess_type(_path)[0], "Entropy": get_entropy(temp_f)}
def calculatehashes(directory, oldhashes={}): ourhashes = {} # get list of all files in the directory dirlist=os.listdir(directory) # iterate through each file for f in dirlist: # skip files already in hash DB if f in oldhashes: # use previously-calculated hash ourhashes[f] = oldhashes[f] else: # calculate hash and store ourhashes[f] = ssdeep.hash_from_file(os.path.join(directory,'/',f)) return ourhashes
def ProcessFile(path): if not(os.path.isfile(path)): print '{0} not a file!'.format(path) return 2 output = "" try: if ssdeep_python: hash = ssdeep.hash_from_file(path) print hash else: p = Popen(["ssdeep", "-b", path], stdout=PIPE, stderr=PIPE) output, err = p.communicate() rc = p.returncode print ParseOutput(output) except Exception as ex: return 1 return 0
def scan(filelist): results = [] for fname in filelist: goodtogo = False i = 0 #Ran into a weird issue with file locking, this fixes it while not goodtogo and i < 5: try: results.append((fname, ssdeep.hash_from_file(fname))) goodtogo = True except Exception as e: print('ssdeeper:', e) time.sleep(3) i += 1 metadata = {} metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Include"] = False return (results, metadata)
sys.exit(1) else: return (dir1, dir2) if __name__ == '__main__': dir1, dir2 = getargs() diffs = [] totalscore = 0 # command line arguments are both dirs if os.path.isdir(dir1) & os.path.isdir(dir2): print '\nSCORE RESULT PATH' comparetrees(dir1, dir2, diffs) if not diffs: print('No diffs found\n') else: for score in diffs: totalscore += score print '\nTotal files compared:',len(diffs) print 'Overall match score: ',str(totalscore/len(diffs))+'%\n' else: try: # command line arguments are both files score = ssdeep.compare(ssdeep.hash_from_file(dir1),ssdeep.hash_from_file(dir2)) print 'Overall match score: ',str(score)+'%\n' except: print 'Invalid Files/Folders: Aborting...' sys.exit(1)
def testComputeHashFromFile(self): self.assertEqual( ssdeep.hash_from_file("test-file.txt"), "3:AXGBicFlgVNhBGcL6wCrFQE3:AXGHsNhxLsr2s" )
def get_ssdeep(self): try: return ssdeep.hash_from_file(self.filepath) except Exception as e: self.logger.exception('%s: %s' % (Exception, e))
screenshotPath = '/root/Desktop/vnchash/arena/' for file in os.listdir(screenshotPath): screenshot = Image.open(screenshotPath + file) screenshot = screenshot.resize((10, 10)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=20) hash = imagehash.dhash(screenshot) print str(hash) + ' ' + file def hashFileCreator(): screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg') screenshot = screenshot.resize((100, 100)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10) screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif') hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif') print hash def compareHashes() hashone = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg') hashtwo = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg-temp.jpg') print hashone print hashtwo print ssdeep.compare(hashone, hashtwo) compareHashes()