def test_compare(self): res = ssdeep.compare( "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C", "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C" ) assert res == 22 res = ssdeep.compare( b"3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C", b"3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C" ) assert res == 22
def _calc_score(self, lt_new): try: import ssdeep except ImportError: raise ImportError( "ltgroup algorithm <ssdeep> needs python package ssdeep") ret = [] h1 = ssdeep.hash(str(lt_new)) if self._mem_hash: if len(self._d_hash) == 0: # initialize d_hash for lt in self._lttable: h = ssdeep.hash(str(lt)) self._d_hash[lt.ltid] = h for ltid, lt_temp in enumerate(self._lttable): h2 = self._d_hash[lt_temp.ltid] score = ssdeep.compare(h1, h2) ret.append((ltid, score)) self._d_hash[lt_new.ltid] = h1 else: for lt_temp in self._lttable: ltid = lt_temp.ltid score = hash_score(str(lt_new), str(lt_temp)) ret.append((ltid, score)) return ret
def fuzz_search_fast(id,p,fuzz): #print("searching fuzz") block=int(fuzz.split(':')[0]) lap=500 client=MongoClient(env["metadata"]["host"],env["metadata"]["port"]) db=client[env["db_metadata_name"]] coll_meta=db[env["db_metadata_collection"]] f1=coll_meta.find({},{"file_id":1,p:1}) l=[] for f in f1: l.append(f) #print("comparando") dic={} for a in l: res=-1 try: f_comp=a[p] block_comp=int(f_comp.split(':')[0]) if(block_comp <=block+lap and block_comp>=block-lap): res=ssdeep.compare(f_comp,fuzz) if(res>0): dic[a["file_id"]]=res except Exception, e: print str(e) #print(str(res)+"------"+str(a[p])+"-----"+str(a["file_id"])) continue
def testCompareHash(self): self.assertEqual( ssdeep.compare( "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C", "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C" ), 22 )
def _check_duplicated(self, hash_md5, hash_ssdeep, doc_size): if self._database.get_doc(hash_md5): raise error.DocumentDuplicatedExact() eps = max(0.5 * doc_size, 102400) lower_size = max(0, doc_size - eps) upper_size = doc_size + eps docs = self._database.get_similar_docs(lower_size, upper_size) for doc in docs: score = ssdeep.compare(hash_ssdeep, doc.hash_ssdeep) if score >= self.SSDEEP_THRESHOLD: raise error.DocumentDuplicatedSimilar()
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data): vector = lil_matrix((1, feature_count), dtype=np.float) for index, (offset, code, ssdeep_hash) in sparse_feature_set: if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[0, index] = d return vector
def vectorize(feature_set, c2_data): vector = np.zeros((len(feature_set),), dtype=np.float) for index, (offset, code, ssdeep_hash) in enumerate(feature_set): if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[index] = d return vector
def process_sigs(cursor, sig_list, bin_list): """ Process the function signatures Go through every function and compare it to functions in every binary Get the highest score per function and add it to a score_list cursor - the database cursor sig_list - the list of function signatures for analysis bin_list - the list of binaries in the current database to compare to""" score_list = [] maxval = 0 for row in bin_list: cursor.execute("SELECT count(hash) FROM functions WHERE binaryid=?", (row[0],)) maxval += int(cursor.fetchone()[0]) maxval = maxval*len(sig_list) if progressbar: widgets = [" ", progressbar.Bar(marker="#"), " ", progressbar.Percentage(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(widgets=widgets, maxval=maxval).start() else: pbar = None i = 0 for row in bin_list: function_score_list = [] for sig in sig_list: highest_score = 0 cursor.execute("SELECT hash FROM functions WHERE binaryid=?", (row[0], )) # h means hash, hash is a keyword in Python # so we can't use it for h in cursor.fetchall(): strength = ssdeep.compare(sig, h[0]) if strength > highest_score: highest_score = strength i += 1 if pbar: pbar.update(i) elif i % 10000 == 0 or i == maxval: print("%d / %d Done" % (i, maxval)) function_score_list.append(highest_score) score_list.append(function_score_list) if pbar: pbar.finish() return score_list
def main(mailFields, key, msgMailRequest): """Decides if a spam is new or old. Takes following parameters: a. mailFields - parsed spam fields, b. key - spam file name, c. msgMailRequest - original spam that is to be relayed. Passes spam to shivaaddnewrecord module if spam is new or list is empty. Else, passes spam to shivaprocessold module. """ logging.info("[+]Inside shivadecide module.") records = server.QueueReceiver.records # Checking if we have any item in our global list. # If no item: then we will directly push spam details into the list # Else: Do the processing. if not records: shivaaddnewrecord.main(mailFields, key, msgMailRequest) else: if mailFields['text']: threshold = 75 else: threshold = 85 oriLen = int(mailFields['len']) minLen, maxLen = int(oriLen * 0.90), int(oriLen * 1.10) count = 0 for record in records: if record['len'] >= minLen and record['len'] <= maxLen: if mailFields['s_id'] is record['s_id']: shivaprocessold.main(mailFields, record['s_id'], key, msgMailRequest) else: ratio = ssdeep.compare(mailFields['ssdeep'], record['ssdeep']) if ratio >= threshold: shivaprocessold.main(mailFields, record['s_id'], key, msgMailRequest) break count += 1 if count == len(records): shivaaddnewrecord.main(mailFields, key, msgMailRequest)
def execute(self, input_data): ''' Execute method ''' my_ssdeep = input_data['meta_deep']['ssdeep'] my_md5 = input_data['meta_deep']['md5'] # For every PE sample in the database compute my ssdeep fuzzy match results = self.c.batch_work_request('meta_deep', {'type_tag':'pe','subkeys':['md5','ssdeep']}) sim_list = [] for result in results: if result['md5'] != my_md5: sim_list.append({'md5':result['md5'], 'sim':ssd.compare(my_ssdeep, result['ssdeep'])}) # Sort and return the sim_list (with some logic for threshold) sim_list.sort(key=itemgetter('sim'), reverse=True) output_list = [sim for sim in sim_list if sim['sim'] > 0] return {'md5': my_md5, 'sim_list':output_list}
def creategraph(fuzzyhashes, threshold=50): G = nx.Graph() checkedhashes = set() # iterate over keys in fuzzyhashes for k in fuzzyhashes.iterkeys(): # calculate similarity to all *remaining* hashes for l in fuzzyhashes.iterkeys(): if (k != l) and l not in checkedhashes: sim = ssdeep.compare(fuzzyhashes[k], fuzzyhashes[l]) # if similarity is >= threshold, add it to the graph if sim >= threshold: G.add_edge(k, l, weight=sim) checkedhashes.add(k) return G
def comparetrees(dir1, dir2, diffs): """ Compare all subdirectories and files in two directory trees Same files have a matching score of 100 Symlinks have a matching score of 100 Different files have a matching score calculated using ssdeep (0 to 100) """ names1 = os.listdir(dir1) names2 = os.listdir(dir2) comparedirs(dir1, dir2, diffs, names1, names2) common = intersect(names1, names2) missed = common[:] # compare contents of files in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isfile(path1) and os.path.isfile(path2): missed.remove(name) file1 = open(path1, 'rb') file2 = open(path2, 'rb') while True: bytes1 = file1.read(blocksize) bytes2 = file2.read(blocksize) if (not bytes1) and (not bytes2): # same file print ' 100 matches','/'.join(path1.split('/')[1:]) diffs.append(100) break if bytes1 != bytes2: # different content score = ssdeep.compare(ssdeep.hash_from_file(path1),ssdeep.hash_from_file(path2)) print str(score).rjust(5),'differs','/'.join(path1.split('/')[1:]) diffs.append(score) break # recur to compare directories in common for name in common: path1 = os.path.join(dir1, name) path2 = os.path.join(dir2, name) if os.path.isdir(path1) and os.path.isdir(path2): missed.remove(name) comparetrees(path1, path2, diffs) # same name but not both files or dirs (symlinks) for name in missed: diffs.append(100) print(' - ignored '+name+' (symlink)')
def main(): all_hashes = {'malicious': [], 'benign': []} app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware similarity_buckets = ['similarity_limit_0', 'similarity_limit_0.2', 'similarity_limit_0.4', 'similarity_limit_0.6', 'similarity_limit_0.8', 'similarity_limit_1.0'] root_dir = os.getcwd() for i, directory in enumerate(['benign_apk', 'malicious_apk']): os.chdir(directory) with open(directory.split('_')[0] + '_apk_ssdeep.csv') as hashes: for j, line in enumerate(hashes): if j == 0: continue b64hash = line.split(',')[0] app_name = line.split(',')[-1].split('/')[-1][:-2] app_malicious_map[app_name] = [1,0] if i else [0,1] all_hashes['malicious' if i else 'benign'].append((app_name, b64hash)) os.chdir(root_dir) all_apps = {} # mapping from each app to its similarity score and classification num_zero = {} num_each = {} for category in all_hashes: num_zero[category] = 0 num_each[category] = 0 for app_and_hash in all_hashes[category]: similarity_scores = [] this_score = app_and_hash[1] for i in range(1000): other_score = random.choice(all_hashes[category])[1] similarity_scores.append(ssdeep.compare(this_score, other_score)) score = numpy.mean(similarity_scores) num_each[category] += 1 if score == 0: num_zero[category] += 1 bit_vector = [] last_limit = -0.01 for limit in similarity_buckets: float_limit = float(limit.split('_')[-1]) if score <= float_limit and score > last_limit: bit_vector.append(1) else: bit_vector.append(0) last_limit = float_limit if not any(bit_vector): # score > 1 bit_vector[-1] = 1 all_apps[app_and_hash[0]] = {'vector': bit_vector, 'malicious': app_malicious_map[app_and_hash[0]]} with open('app_hash_vectors.json', 'w') as outfile: json.dump({'features': similarity_buckets, 'apps': all_apps}, outfile) print('{} of {} malicious apps and {} of {} benign apps had zero similarity found'.format(num_zero['malicious'], num_each['malicious'], num_zero['benign'], num_zero['benign'])) print('Wrote data on ' + str(len(all_apps)) + ' apps to a file.')
def compare_ssdeep(payload1, payload2): """ Compare binary payloads with ssdeep to determine :param bytes payload1: Binary content to compare :param bytes payload2: Binary content to compare :returns: Match score from 0 (no match) to 100 :type: int or None """ payload1_hash = get_ssdeep(payload1) payload2_hash = get_ssdeep(payload2) try: match = ssdeep.compare(payload1_hash, payload2_hash) except: match = None return match
def fuzz_search_fast(id, p, fuzz): block = int(fuzz.split(':')[0]) lap = 500 coll_meta = db[envget("db_metadata_collection")] f1 = coll_meta.find({}, {"file_id": 1, p: 1}) l = [] for f in f1: l.append(f) dic = {} for a in l: res = -1 try: f_comp = a[p] block_comp = int(f_comp.split(':')[0]) if(block_comp <= block + lap and block_comp >= block - lap): res = ssdeep.compare(f_comp, fuzz) if(res > 0): dic[a["file_id"]] = res except Exception, e: logging.exception( "fuzz_search_fast(id=" + str(id) + ",p=" + str(p) + ",fuzz=" + str(fuzz)) continue
def searchFuzzy(fuzz, limit, thresh): client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db["db_metadata_collection"] f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit) l = [] for f in f1: l.append(f) ret = {} for a in l: res = -1 try: res = ssdeep.compare(a["fuzzy_hash"], fuzz) except InternalError: print(str(res) + "------" + str(a["fuzzy_hash"]) + "-----" + str(a["file_id"])) continue if(res >= thresh): ret[a["file_id"]] = res return ret
def test_compare(self): with pytest.raises(TypeError): ssdeep.compare( "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C", None ) with pytest.raises(TypeError): ssdeep.compare( None, "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C" ) with pytest.raises(ssdeep.InternalError): ssdeep.compare( "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C", "" )
def run(self): if self.option_extdns: if self.nameservers: resolv = Resolver(configure=False) resolv.nameservers = self.nameservers else: resolv = Resolver() resolv.search = [] resolv.lifetime = REQUEST_TIMEOUT_DNS * REQUEST_RETRIES_DNS resolv.timeout = REQUEST_TIMEOUT_DNS EDNS_PAYLOAD = 1232 resolv.use_edns(edns=True, ednsflags=0, payload=EDNS_PAYLOAD) if hasattr(resolv, 'resolve'): resolve = resolv.resolve else: resolve = resolv.query if self.option_geoip: geo = geoip() while not self.kill_received: try: domain = self.jobs.get(block=False) except queue.Empty: self.kill_received = True return if self.option_extdns: nxdomain = False dns_ns = False dns_a = False dns_aaaa = False dns_mx = False try: domain['dns-ns'] = self.__answer_to_list( resolve(domain['domain-name'], rdtype=dns.rdatatype.NS)) dns_ns = True except NXDOMAIN: nxdomain = True except NoNameservers: domain['dns-ns'] = ['!ServFail'] except DNSException as e: self.__debug(e) if nxdomain is False: try: domain['dns-a'] = self.__answer_to_list( resolve(domain['domain-name'], rdtype=dns.rdatatype.A)) dns_a = True except NoNameservers: domain['dns-a'] = ['!ServFail'] except DNSException as e: self.__debug(e) try: domain['dns-aaaa'] = self.__answer_to_list( resolve(domain['domain-name'], rdtype=dns.rdatatype.AAAA)) dns_aaaa = True except NoNameservers: domain['dns-aaaa'] = ['!ServFail'] except DNSException as e: self.__debug(e) if nxdomain is False and dns_ns is True: try: domain['dns-mx'] = self.__answer_to_list( resolve(domain['domain-name'], rdtype=dns.rdatatype.MX)) dns_mx = True except NoNameservers: domain['dns-mx'] = ['!ServFail'] except DNSException as e: self.__debug(e) else: try: ip = socket.getaddrinfo(domain['domain-name'], 80) except socket.gaierror as e: if e.errno == -3: domain['dns-a'] = ['!ServFail'] except Exception as e: self.__debug(e) else: domain['dns-a'] = list() domain['dns-aaaa'] = list() for j in ip: if '.' in j[4][0]: domain['dns-a'].append(j[4][0]) if ':' in j[4][0]: domain['dns-aaaa'].append(j[4][0]) domain['dns-a'] = sorted(domain['dns-a']) domain['dns-aaaa'] = sorted(domain['dns-aaaa']) dns_a = True dns_aaaa = True if self.option_mxcheck: if dns_mx is True: if domain['domain-name'] != self.domain_init: if self.__mxcheck(domain['dns-mx'][0], self.domain_init, domain['domain-name']): domain['mx-spy'] = True if self.option_geoip: if dns_a is True: try: country = geo.country_by_addr(domain['dns-a'][0]) except Exception as e: self.__debug(e) pass else: if country: domain['geoip-country'] = country.split(',')[0] if self.option_banners: if dns_a is True: banner = self.__banner_http(domain['dns-a'][0], domain['domain-name']) if banner: domain['banner-http'] = banner if dns_mx is True: banner = self.__banner_smtp(domain['dns-mx'][0]) if banner: domain['banner-smtp'] = banner if self.option_ssdeep: if dns_a is True or dns_aaaa is True: try: req = requests.get( self.uri_scheme + '://' + domain['domain-name'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': self.useragent}, verify=False) except Exception as e: self.__debug(e) pass else: if req.status_code // 100 == 2 and req.url.split( '?')[0] != self.ssdeep_effective_url: ssdeep_curr = ssdeep.hash(''.join( req.text.split()).lower()) domain['ssdeep-score'] = ssdeep.compare( self.ssdeep_init, ssdeep_curr) self.jobs.task_done()
def run(self): while not self.kill_received: domain = self.jobs.get() if module_dnspython: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ns = resolv.query(domain['domain'], 'NS') domain['ns'] = str(sorted(ns)[0])[:-1].lower() except Exception: pass if 'ns' in domain: try: ns = resolv.query(domain['domain'], 'A') domain['a'] = str(sorted(ns)[0]) except Exception: pass try: ns = resolv.query(domain['domain'], 'AAAA') domain['aaaa'] = str(sorted(ns)[0]) except Exception: pass try: ns = resolv.query(domain['domain'], 'MX') mx = str(sorted(ns)[0].exchange)[:-1].lower() if mx: domain['mx'] = mx except Exception: pass else: try: ip = socket.getaddrinfo(domain['domain'], 80) except Exception: pass else: for j in ip: if '.' in j[4][0]: domain['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domain['aaaa'] = j[4][0] break if module_whois and args.whois: if 'ns' in domain and 'a' in domain: try: whoisdb = whois.query(domain['domain']) domain['created'] = str(whoisdb.creation_date).replace(' ', 'T') domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T') except Exception: pass if module_geoip and geoip_db and args.geoip: if 'a' in domain: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) try: country = gi.country_name_by_addr(domain['a']) except Exception: pass else: if country: domain['country'] = country.split(',')[0] if args.banners: if 'a' in domain: banner = self.__banner_http(domain['a'], domain['domain']) if banner: domain['banner-http'] = banner if 'mx' in domain: banner = self.__banner_smtp(domain['mx']) if banner: domain['banner-smtp'] = banner if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep: if 'a' in domain: try: req = requests.get('http://' + domain['domain'], timeout=REQUEST_TIMEOUT_HTTP) fuzz_domain_ssdeep = ssdeep.hash(req.text) except Exception: pass else: domain['ssdeep'] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep) self.jobs.task_done()
def run(self): while not self.kill_received: domain = self.jobs.get() if self.option_extdns: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ans = resolv.query(domain['domain'], 'SOA') domain['ns'] = str( sorted(ans)[0]).split(' ')[0][:-1].lower() except Exception: pass if 'ns' in domain: try: ans = resolv.query(domain['domain'], 'A') domain['a'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'AAAA') domain['aaaa'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'MX') mx = str(sorted(ans)[0].exchange)[:-1].lower() if mx: domain['mx'] = mx except Exception: pass else: try: ip = socket.getaddrinfo(domain['domain'], 80) except Exception: pass else: for j in ip: if '.' in j[4][0]: domain['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domain['aaaa'] = j[4][0] break if self.option_mxcheck: if 'mx' in domain: if domain['domain'] is not self.domain_orig: if self.__mxcheck(domain['mx'], self.domain_orig, domain['domain']): domain['mx-spy'] = True if self.option_whois: if 'ns' in domain and 'a' in domain: try: whoisdb = whois.query(domain['domain']) domain['created'] = str(whoisdb.creation_date).replace( ' ', 'T') domain['updated'] = str(whoisdb.last_updated).replace( ' ', 'T') except Exception: pass if self.option_geoip: if 'a' in domain: gi = GeoIP.open( FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE) try: country = gi.country_name_by_addr(domain['a']) except Exception: pass else: if country: domain['country'] = country.split(',')[0] if self.option_banners: if 'a' in domain: banner = self.__banner_http(domain['a'], domain['domain']) if banner: domain['banner-http'] = banner if 'mx' in domain: banner = self.__banner_smtp(domain['mx']) if banner: domain['banner-smtp'] = banner if self.option_ssdeep: if 'a' in domain: try: req = requests.get(self.uri_scheme + '://' + domain['domain'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP) ssdeep_fuzz = ssdeep.hash(req.text) except Exception: pass else: domain['ssdeep'] = ssdeep.compare( self.ssdeep_orig, ssdeep_fuzz) self.jobs.task_done()
def difference(fbytes1, fbytes2): hash1 = ssdeep.hash(fbytes1) hash2 = ssdeep.hash(fbytes2) return 100 - ssdeep.compare(hash1, hash2)
def cmd(): res_line = "Between {} and {}, there is {:.2f} similiarity" # just formatting hash1 = ssdeep.hash_from_file(sys.argv[1]) #first file, first arg hash2 = ssdeep.hash_from_file(sys.argv[2]) # second file, second arg result = ssdeep.compare(hash1, hash2) # completes a fuzzy hash on 2 files supplied by cmd args print(res_line.format(sys.argv[1], sys.argv[2], result ))
def compare_files(file_path1, file_path2): hash1 = get_import_table_hash(file_path1) hash2 = get_import_table_hash(file_path2) return compare(hash1, hash2)
def handle(self): self.cluster_start() # parepare ssdeep_lists ssdeep_sets = set() for path in self.file_lists: with open(path, 'r') as f: for line in f: line = line.strip() if len(line) == 0: continue ssdeep_sets.add(line) ssdeep_lists = list(ssdeep_sets) # print '> ssdeep cluster' for path in ssdeep_lists: if ',' in path: shash, path = path.split(',', 1) path = path.strip('"') else: shash = path self.hashes[path] = shash self.sha256s[path] = hashlib.sha256(path).hexdigest() block_size, chunk, double_chunk = self.process_ssdeep_hash( self.hashes[path]) similar_to = self.insert2db(block_size, chunk, path) | self.insert2db( block_size * 2, double_chunk, path) h = self.hashes[path] self.matches[path] = set() for other in similar_to: score = ssdeep.compare(h, self.hashes[other]) if score > self.similar_score: self.matches[path].add(other) self.matches[other].add(path) if path not in self.scores: self.scores[path] = {} if other not in self.scores[path]: self.scores[path][other] = score if other not in self.scores: self.scores[other] = {} if path not in self.scores[other]: self.scores[other][path] = score # ssdeep groups for path in self.matches.keys(): in_a_group = False for g in xrange(len(self.groups)): if path in self.groups[g]: in_a_group = True continue should_add = True for h in self.groups[g]: if h not in self.matches[path]: should_add = False if should_add: self.groups[g].append(path) in_a_group = True if not in_a_group: self.groups.append([path]) for g in xrange(len(self.groups)): self.groups[g].sort() self.cluster_finish()
def run(self): while not self.kill_received: domain = self.jobs.get() if module_dnspython: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ns = resolv.query(domain['domain'], 'NS') domain['ns'] = str(sorted(ns)[0])[:-1].lower() except Exception: pass if 'ns' in domain: try: ns = resolv.query(domain['domain'], 'A') domain['a'] = str(sorted(ns)[0]) except Exception: pass try: ns = resolv.query(domain['domain'], 'AAAA') domain['aaaa'] = str(sorted(ns)[0]) except Exception: pass try: ns = resolv.query(domain['domain'], 'MX') mx = str(sorted(ns)[0].exchange)[:-1].lower() if mx: domain['mx'] = mx except Exception: pass else: try: ip = socket.getaddrinfo(domain['domain'], 80) except Exception: pass else: for j in ip: if '.' in j[4][0]: domain['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domain['aaaa'] = j[4][0] break if module_whois and args.whois: if 'ns' in domain and 'a' in domain: try: whoisdb = whois.query(domain['domain']) domain['created'] = str(whoisdb.creation_date).replace( ' ', 'T') domain['updated'] = str(whoisdb.last_updated).replace( ' ', 'T') except Exception: pass if module_geoip and geoip_db and args.geoip: if 'a' in domain: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) try: country = gi.country_name_by_addr(domain['a']) except Exception: pass else: if country: domain['country'] = country.split(',')[0] if args.banners: if 'a' in domain: banner = self.__banner_http(domain['a'], domain['domain']) if banner: domain['banner-http'] = banner if 'mx' in domain: banner = self.__banner_smtp(domain['mx']) if banner: domain['banner-smtp'] = banner if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep: if 'a' in domain: try: req = requests.get('http://' + domain['domain'], timeout=REQUEST_TIMEOUT_HTTP) fuzz_domain_ssdeep = ssdeep.hash(req.text) except Exception: pass else: domain['ssdeep'] = ssdeep.compare( self.orig_domain_ssdeep, fuzz_domain_ssdeep) self.jobs.task_done()
screenshotPath = '/root/Desktop/vnchash/arena/' for file in os.listdir(screenshotPath): screenshot = Image.open(screenshotPath + file) screenshot = screenshot.resize((10, 10)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=20) hash = imagehash.dhash(screenshot) print str(hash) + ' ' + file def hashFileCreator(): screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg') screenshot = screenshot.resize((100, 100)) screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10) screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif') hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif') print hash def compareHashes() hashone = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg') hashtwo = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg-temp.jpg') print hashone print hashtwo print ssdeep.compare(hashone, hashtwo) compareHashes()
def compare_fuzzy_hash(self, file): # Validation prior to comparesement. if len(file.file_fuzzy_hash) > 1: return ssdeep.compare(file.file_fuzzy_hash[-2], file.file_fuzzy_hash[-1]) return None
#! /usr/bin/env python3 # ハッシュを投げて比較 # Usage: ./ssdeep_compare.py HASH1 HASH2 import ssdeep import sys print(ssdeep.compare(sys.argv[1], sys.argv[2]))
def compare_ssdeep(hash1, hash2): return ssdeep.compare(hash1, hash2)
else: return (dir1, dir2) if __name__ == '__main__': dir1, dir2 = getargs() diffs = [] totalscore = 0 # command line arguments are both dirs if os.path.isdir(dir1) & os.path.isdir(dir2): print '\nSCORE RESULT PATH' comparetrees(dir1, dir2, diffs) if not diffs: print('No diffs found\n') else: for score in diffs: totalscore += score print '\nTotal files compared:', len(diffs) print 'Overall match score: ', str(totalscore / len(diffs)) + '%\n' else: try: # command line arguments are both files score = ssdeep.compare(ssdeep.hash_from_file(dir1), ssdeep.hash_from_file(dir2)) print 'Overall match score: ', str(score) + '%\n' except: print 'Invalid Files/Folders: Aborting...' sys.exit(1)
with open(filename1, 'rb') as fp: process_list = pickle.load(fp) print process_list #print process_list #process_list.append('Searchindexer.') #print process_list #print "Computing hashes... \n" baseSSdeep = [] for prosesses in process_list: baseSSdeep.append(ssdeep.hash(prosesses)) #print baseSSdeep print "\n ----- Levenshtein distance in process names ----- \n" #print "Computing score for each item... \n" scores = [] levScores = [] for a, b in itertools.combinations(baseSSdeep, 2): scores.append(ssdeep.compare(a, b)) #print scores for a, b in itertools.combinations(process_list, 2): score = (levenshtein(a, b)) if score <= 2 and score != 0: print "Close match found" print str(a) + " - " + str(b) + "- Score: " + str(score) + "\n" #print "Scores Levenshtein:" #print levScores
shivanotifyerrors.notifydeveloper("[-] Error (Module shivamaindb.py) - executing fetchfromdb %s" % e) temprecords = tempDb.fetchall() mainrecords = mainDb.fetchall() for t_record in temprecords: maxlen, minlen = int(t_record[2] * 1.10), int(t_record[2] * 0.90) count = 0 for m_record in mainrecords: if m_record[2] >= minlen and m_record[2] <= maxlen: if t_record[0] == m_record[0]: update(t_record[0], m_record[0]) else: ratio = ssdeep.compare(t_record[1], m_record[1]) # Increase the comparison ratio when length is smaller if (int(t_record[2]) <= 150 and ratio >= 95) or (int(t_record[2]) > 150 and ratio >= 80): update(t_record[0], m_record[0]) else: count += 1 else: count += 1 if count == len(mainrecords): insert(t_record[0]) # At last update whitelist recipients group_concat_max_len = "SET SESSION group_concat_max_len = 20000" #whitelist = "INSERT INTO `whitelist` (`id`, `recipients`) VALUES ('1', (SELECT GROUP_CONCAT(DISTINCT `to`) FROM `spam` WHERE `totalCounter` < 30)) ON DUPLICATE KEY UPDATE `recipients` = (SELECT GROUP_CONCAT(DISTINCT `to`) FROM `spam` WHERE `totalCounter` < 30)"
def searchFiles(self): try: if os.listdir(self.confirmPath): fileName = random.choice(os.listdir(self.confirmPath)) filePath = os.path.join(self.confirmPath, fileName) else: raise Exception("Empty confirm virus sample folder.") print("Reference File For Fuzzy Hash: {}".format(filePath)) refHash = ssdeep.hash_from_file(filePath) print("Fuzzy Hash Of Reference File: {}\n".format(refHash)) # Preprocess the total files count fileCounter = 0 for filePath in listdir(self.confirmPath): fileCounter += 1 if fileCounter == 1: self.confirmPathFileHash.append(refHash) shutil.copy(filePath, self.inputFilesPath) else: with tqdm(total=fileCounter, unit="files", desc="Fuzzy find in confirm path: ") as pbar: for traverseFilePath in listdir(self.confirmPath): pbar.update(1) pbar.set_postfix(file=filePath.split(os.path.sep)[-1:]) if filePath == traverseFilePath: self.confirmPathFileHash.append(refHash) shutil.copy(traverseFilePath, self.inputFilesPath) continue tmpHash = ssdeep.hash_from_file(traverseFilePath) # print("File: ", traverseFilePath, " - ", ssdeep.compare(refHash, tmpHash)) if ssdeep.compare(refHash, tmpHash) >= self.confirmFilesPercent: self.confirmPathFileHash.append(tmpHash) shutil.copy(traverseFilePath, self.inputFilesPath) else: shutil.copy(traverseFilePath, self.probablePath) print("\n") fileCounter = 0 for filePath in listdir(self.probablePath): fileCounter += 1 if fileCounter == 0: raise Exception("Empty probable virus sample folder.") else: with tqdm(total=fileCounter, unit="files", desc="Fuzzy find in probable path: ") as pbar: for traverseFilePath in listdir(self.probablePath): pbar.update(1) pbar.set_postfix(file=filePath.split(os.path.sep)[-1:]) tmpHash = ssdeep.hash_from_file(traverseFilePath) for fileHash in self.confirmPathFileHash: # print("File: ", traverseFilePath, " - ", ssdeep.compare(refHash, tmpHash)) if ssdeep.compare( fileHash, tmpHash) >= self.probableFilesPercent: shutil.copy(traverseFilePath, self.inputFilesPath) break print("\n") except Exception as error: raise Exception(error) sys.exit(1)
def run(self): while not self.kill_received: domain = self.jobs.get() if MODULE_DNSPYTHON: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ans = resolv.query(domain['domain'], 'SOA') domain['ns'] = str(sorted(ans)[0]).split(' ')[0][:-1].lower() except Exception: pass if 'ns' in domain: try: ans = resolv.query(domain['domain'], 'A') domain['a'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'AAAA') domain['aaaa'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'MX') mx = str(sorted(ans)[0].exchange)[:-1].lower() if mx: domain['mx'] = mx except Exception: pass else: try: ip = socket.getaddrinfo(domain['domain'], 80) except Exception: pass else: for j in ip: if '.' in j[4][0]: domain['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domain['aaaa'] = j[4][0] break if MODULE_WHOIS and args.whois: if 'ns' in domain and 'a' in domain: try: whoisdb = whois.query(domain['domain']) domain['created'] = str(whoisdb.creation_date).replace(' ', 'T') domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T') except Exception: pass if MODULE_GEOIP and DB_GEOIP and args.geoip: if 'a' in domain: gi = GeoIP.open(FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE) try: country = gi.country_name_by_addr(domain['a']) except Exception: pass else: if country: domain['country'] = country.split(',')[0] if args.banners: if 'a' in domain: banner = self.__banner_http(domain['a'], domain['domain']) if banner: domain['banner-http'] = banner if 'mx' in domain: banner = self.__banner_smtp(domain['mx']) if banner: domain['banner-smtp'] = banner if args.ssdeep and MODULE_REQUESTS and MODULE_SSDEEP and self.orig_domain_ssdeep: if 'a' in domain: try: req = requests.get(self.uri_scheme + '://' + domain['domain'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP) fuzz_domain_ssdeep = ssdeep.hash(req.text) except Exception: pass else: domain['ssdeep'] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep) self.jobs.task_done()
from pymongo import MongoClient import ssdeep fuzzy_to_compare = "12288:lTurEUKhROhnCzrwsrsNuRIHZB62atXtjBIuMAI0VpnJJyeVxy5la8AJv:lqrEJhROh8rwKsNrDK9xM3cJyeg0Jv" client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db[envget('db_metadata_collection')] print("loading") f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}) results = [] for f in f1: results.append(f) print("compearing") count = 0 reset = 0 for a in results: try: res = ssdeep.compare(a["fuzzy_hash"], fuzzy_to_compare) except Exception, e: print str(e) continue if(res >= 50): print("%s - %s" % (res, a["file_id"])) # print count # reset+=1; count+=1 # if(reset>=1000): # print(str(count)+" procesados") # reset=0
def hardcode(): hash1 = ssdeep.hash_from_file('{location of first executable}') # be sure to fill in the location of the executable hash2 = ssdeep.hash_from_file('{location of second executable}') print(ssdeep.compare(hash1, hash2))
def run(self): while not self.kill_received: domain = self.jobs.get() if self.option_extdns: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ans = resolv.query(domain['domain'], 'SOA') domain['ns'] = str(sorted(ans)[0]).split(' ')[0][:-1].lower() except Exception: pass if 'ns' in domain: try: ans = resolv.query(domain['domain'], 'A') domain['a'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'AAAA') domain['aaaa'] = str(sorted(ans)[0]) except Exception: pass try: ans = resolv.query(domain['domain'], 'MX') mx = str(sorted(ans)[0].exchange)[:-1].lower() if mx: domain['mx'] = mx except Exception: pass else: try: ip = socket.getaddrinfo(domain['domain'], 80) except Exception: pass else: for j in ip: if '.' in j[4][0]: domain['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domain['aaaa'] = j[4][0] break if self.option_mxcheck: if 'mx' in domain: if domain['domain'] is not self.domain_orig: if self.__mxcheck(domain['mx'], self.domain_orig, domain['domain']): domain['mx-spy'] = True if self.option_whois: if 'ns' in domain and 'a' in domain: try: whoisdb = whois.query(domain['domain']) domain['created'] = str(whoisdb.creation_date).replace(' ', 'T') domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T') except Exception: pass if self.option_geoip: if 'a' in domain: gi = GeoIP.open(FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE) try: country = gi.country_name_by_addr(domain['a']) except Exception: pass else: if country: domain['country'] = country.split(',')[0] if self.option_banners: if 'a' in domain: banner = self.__banner_http(domain['a'], domain['domain']) if banner: domain['banner-http'] = banner if 'mx' in domain: banner = self.__banner_smtp(domain['mx']) if banner: domain['banner-smtp'] = banner if self.option_ssdeep: if 'a' in domain: try: req = requests.get(self.uri_scheme + '://' + domain['domain'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': 'Mozilla/5.0 (dnstwist)'}) #ssdeep_fuzz = ssdeep.hash(req.text.replace(' ', '').replace('\n', '')) ssdeep_fuzz = ssdeep.hash(req.text) except Exception: pass else: if req.status_code / 100 == 2: domain['ssdeep'] = ssdeep.compare(self.ssdeep_orig, ssdeep_fuzz) self.jobs.task_done()
for dico in dico_path_set: # Opening dico if dico == filedicopath_today: opened_dico.append([dico, today_dico]) else: with open(dico, 'r') as fp: opened_dico.append([dico, json.load(fp)]) #retrieve hash from paste paste_hash = PST._get_p_hash() # Go throught the Database of the dico (of the month) threshold_dup = 99 for dico_name, dico in opened_dico: for dico_key, dico_hash in dico.items(): percent = ssdeep.compare(dico_hash, paste_hash) if percent > threshold_dup: db = dico_name[-6:] # Go throught the Database of the dico filter (month) r_serv_dico = dico_redis[db] # index of paste index_current = r_serv_dico.get(dico_hash) paste_path = r_serv_dico.get(index_current) if paste_path != None: hash_dico[dico_hash] = (paste_path, percent) #print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent) print ' ' + PST.p_path[44:] + ', ' + paste_path[ 44:] + ', ' + str(percent)
print("%s documentos encontrados"%(f,)) #for a in f: # print(a["file_id"]) f=coll_meta.count({"particular_header.packer_detection":"False"}) print("%s documentos falsos"%(f,)) f=coll_meta.count({"particular_header.packer_detection":"Unknown"}) print("%s documentos desconocidos"%(f,)) """ f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}) l = [] for f in f1: l.append(f) count = 0 for a in l: count += 1 for b in l[count:]: res = ssdeep.compare(a["fuzzy_hash"], b["fuzzy_hash"]) if(res > 0): print("%s - %s - %s" % (res, a["file_id"], b["file_id"])) print("***** %s ******" % (count,)) # raw_input()
def compare(self, hash1, hash2): """Compare hashes.""" return ssdeep.compare(hash1, hash2)
def run_ssdeep_scans(self): with self.data_lock: cid_list = list(self.crits_data['indicators'].keys()) for cid in cid_list: # Ignore processed indicators with self.data_lock: if self.crits_data['indicators'][cid]['completed']: continue log.info('Running ssdeep scan on indicator {}'.format(cid)) find_p = Popen([ 'find', '/mnt/storage', '-maxdepth', '2', '-mindepth', '2', '-type', 'f', '-not', '-name', '"*.*"' ], stdout=subprocess.PIPE) head_p = Popen(['head', '-n', self.scan_count], stdin=find_p.stdout, stdout=subprocess.PIPE) stdout, stderr = head_p.communicate() files = stdout.decode('ascii').splitlines() failed = False for f in files: file_hash = ssdeep.hash_from_file(f) with self.data_lock: indicator_hash = self.crits_data['indicators'][cid][ 'value'] percent_match = ssdeep.compare(file_hash, indicator_hash) if percent_match > self.match_threshold: # This indicator fails FAQueue log.info( 'Indicator {} failed with percentage of {}'.format( cid, percent_match)) failed = True # CarbonBlack returns a json report of the file with details report = self._get_json_report(f) if report: with self.data_lock: self.crits_data['indicators'][cid][ 'results'].append({ 'file_matched': f, 'score': percent_match, 'report': report, 'total_hits': 1 }) self._attach_json_report_observables(cid, report) else: with self.data_lock: self.crits_data['indicators'][cid][ 'results'].append({ 'file_matched': f, 'score': percent_match, 'total_hits': 1 }) if failed: with self.data_lock: self.crits_data['indicators'][cid][ 'status'] = 'In Progress' self.crits_data['indicators'][cid]['completed'] = True else: with self.data_lock: self.crits_data['indicators'][cid]['status'] = 'Analyzed' self.crits_data['indicators'][cid]['completed'] = True
sys.path.insert(0, path) import ssdeep fuzzy_to_compare = "12288:lTurEUKhROhnCzrwsrsNuRIHZB62atXtjBIuMAI0VpnJJyeVxy5la8AJv:lqrEJhROh8rwKsNrDK9xM3cJyeg0Jv" client = MongoClient(env["metadata"]["host"], env["metadata"]["port"]) db = client[env["db_metadata_name"]] coll_meta = db[env["db_metadata_collection"]] print("loading") f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}) l = [] for f in f1: l.append(f) print("compearing") count = 0 reset = 0 for a in l: try: res = ssdeep.compare(a["fuzzy_hash"], fuzzy_to_compare) except Exception, e: print str(e) continue if (res >= 50): print("%s - %s" % (res, a["file_id"])) #print count #reset+=1; count+=1 #if(reset>=1000): # print(str(count)+" procesados") # reset=0
def get_ssdeep_comparison(first, second): diff = ssdeep.compare(first, second) return diff
def run(self): while not self.kill_received: try: domain = self.jobs.get(block=False) except queue.Empty: self.kill_received = True return domain['domain-name'] = domain['domain-name'].encode( 'idna').decode() if self.option_extdns: if self.nameservers: resolv = dns.resolver.Resolver(configure=False) resolv.nameservers = self.nameservers else: resolv = dns.resolver.Resolver() resolv.lifetime = REQUEST_TIMEOUT_DNS * REQUEST_RETRIES_DNS resolv.timeout = REQUEST_TIMEOUT_DNS nxdomain = False dns_ns = False dns_a = False dns_aaaa = False dns_mx = False try: domain['dns-ns'] = self.__answer_to_list( resolv.query(domain['domain-name'], rdtype=dns.rdatatype.NS)) dns_ns = True except dns.resolver.NXDOMAIN: nxdomain = True pass except dns.resolver.NoNameservers: domain['dns-ns'] = ['!ServFail'] pass except DNSException: pass if nxdomain is False: try: domain['dns-a'] = self.__answer_to_list( resolv.query(domain['domain-name'], rdtype=dns.rdatatype.A)) dns_a = True except dns.resolver.NoNameservers: domain['dns-a'] = ['!ServFail'] pass except DNSException: pass try: domain['dns-aaaa'] = self.__answer_to_list( resolv.query(domain['domain-name'], rdtype=dns.rdatatype.AAAA)) dns_aaaa = True except dns.resolver.NoNameservers: domain['dns-aaaa'] = ['!ServFail'] pass except DNSException: pass if nxdomain is False and dns_ns is True: try: domain['dns-mx'] = self.__answer_to_list( resolv.query(domain['domain-name'], rdtype=dns.rdatatype.MX)) dns_mx = True except dns.resolver.NoNameservers: domain['dns-mx'] = ['!ServFail'] pass except DNSException: pass else: try: ip = socket.getaddrinfo(domain['domain-name'], 80) except socket.gaierror as e: if e.errno == -3: domain['dns-a'] = ['!ServFail'] pass except Exception: pass else: domain['dns-a'] = list() domain['dns-aaaa'] = list() for j in ip: if '.' in j[4][0]: domain['dns-a'].append(j[4][0]) if ':' in j[4][0]: domain['dns-aaaa'].append(j[4][0]) domain['dns-a'] = sorted(domain['dns-a']) domain['dns-aaaa'] = sorted(domain['dns-aaaa']) dns_a = True dns_aaaa = True if self.option_mxcheck: if dns_mx is True: if domain['domain-name'] is not self.domain_orig: if self.__mxcheck(domain['dns-mx'][0], self.domain_orig, domain['domain-name']): domain['mx-spy'] = True if self.option_whois: if nxdomain is False and dns_ns is True: try: whoisdb = whois.query(domain['domain-name']) domain['whois-created'] = str( whoisdb.creation_date).split(' ')[0] domain['whois-updated'] = str( whoisdb.last_updated).split(' ')[0] except Exception: pass if self.option_geoip: if dns_a is True: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) try: country = gi.country_name_by_addr(domain['dns-a'][0]) except Exception: pass else: if country: domain['geoip-country'] = country.split(',')[0] if self.option_banners: if dns_a is True: banner = self.__banner_http(domain['dns-a'][0], domain['domain-name']) if banner: domain['banner-http'] = banner if dns_mx is True: banner = self.__banner_smtp(domain['dns-mx'][0]) if banner: domain['banner-smtp'] = banner if self.option_ssdeep: if dns_a is True or dns_aaaa is True: try: req = requests.get( self.uri_scheme + '://' + domain['domain-name'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': self.useragent}, verify=False) except Exception: pass else: if req.status_code // 100 == 2 and req.url.split( '?')[0] != self.ssdeep_effective_url: ssdeep_curr = ssdeep.hash(''.join( req.text.split()).lower()) domain['ssdeep-score'] = ssdeep.compare( self.ssdeep_init, ssdeep_curr) domain['domain-name'] = domain['domain-name'].encode().decode( 'idna') self.jobs.task_done()
def run(self): while not self.kill_received: domain = self.jobs.get() if module_dnspython: resolv = dns.resolver.Resolver() # resolv.lifetime = REQUEST_TIMEOUT_DNS resolv.timeout = REQUEST_TIMEOUT_DNS try: ns = resolv.query(domain["domain"], "NS") domain["ns"] = str(ns[0])[:-1].lower() except Exception: pass if "ns" in domain: try: ns = resolv.query(domain["domain"], "A") domain["a"] = str(ns[0]) except Exception: pass try: ns = resolv.query(domain["domain"], "AAAA") domain["aaaa"] = str(ns[0]) except Exception: pass try: mx = resolv.query(domain["domain"], "MX") domain["mx"] = str(mx[0].exchange)[:-1].lower() except Exception: pass else: try: ip = socket.getaddrinfo(domain["domain"], 80) except Exception: pass else: for j in ip: if "." in j[4][0]: domain["a"] = j[4][0] break for j in ip: if ":" in j[4][0]: domain["aaaa"] = j[4][0] break if module_whois and args.whois: if "ns" in domain and "a" in domain: try: whoisdb = whois.query(domain["domain"]) domain["created"] = str(whoisdb.creation_date).replace(" ", "T") domain["updated"] = str(whoisdb.last_updated).replace(" ", "T") except Exception: pass if module_geoip and args.geoip: if "a" in domain: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) try: country = gi.country_name_by_addr(domain["a"]) except Exception: pass else: if country: domain["country"] = country if args.banners: if "a" in domain: banner = self.__banner_http(domain["a"], domain["domain"]) if banner: domain["banner-http"] = banner if "mx" in domain: banner = self.__banner_smtp(domain["mx"]) if banner: domain["banner-smtp"] = banner if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep: if "a" in domain: try: req = requests.get("http://" + domain["domain"], timeout=REQUEST_TIMEOUT_HTTP) fuzz_domain_ssdeep = ssdeep.hash(req.text) except Exception: pass else: domain["ssdeep"] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep) self.jobs.task_done()
def is_malware_to_focus(elffile): return \ ssdeep.compare(elffile.features['lstrfuzzy'], '12:TKLJUWLLSQzisKFl1oXNt87U9fPG9K1pSzMT:WfzirnYVPG7gT') >= 50 or \ ssdeep.compare(elffile.features['lstrfuzzy'], '12:lQn5o+ZirjsgAk3MRW7ll8+XS+wPKs0fI:OndZirjbp5l3Ex0fI') >= 50 or \ ssdeep.compare(elffile.features['lstrfuzzy'], '12:GXkVn5o+ZirjsgAk3MRW7ll8+XS+wPKs04:GXAndZirjbp5l3Ex04') >= 50
#! /usr/bin/env python3 # ファイルを投げて比較 # Usage: ./file_ssdeep_compare.py FILE1 FILE2 import ssdeep import sys print( ssdeep.compare(ssdeep.hash_from_file(sys.argv[1]), ssdeep.hash_from_file(sys.argv[2])))
def decide(self, data): feature = self.feature.get_feature(data) if not feature: return False return ssdeep.compare(feature, self.fuzzyhash) > self.threshold
exeSql.execute(fetchfromdb) except mdb.Error, e: print e dbrecords = exeSql.fetchall() maxlen, minlen = int(record['len'] * 1.10), int(record['len'] * 0.90) count = 0 for d_record in dbrecords: if d_record[2] >= minlen and d_record[2] <= maxlen: if record['s_id'] == d_record[0]: update(record, d_record[0]) else: ratio = ssdeep.compare(record['ssdeep'], d_record[1]) #if ratio >= 85: if (int(record['len']) <= 150 and ratio >=95) or (int(record['len']) > 150 and ratio >= 80): update(record, d_record[0]) else: count += 1 else: count += 1 if count == len(dbrecords): insert(record) def insert(record): # Inserting data in main db print "Inserting new spam!"
# Open selected dico range opened_dico = [] for dico_name in dico_range_list: opened_dico.append([dico_name, dico_redis[dico_name]]) # retrieve hash from paste paste_hashes = PST._get_p_hash() # Go throught the Database of the dico (of the month) for curr_dico_name, curr_dico_redis in opened_dico: for hash_type, paste_hash in paste_hashes.items(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): try: if hash_type == 'ssdeep': percent = 100-ssdeep.compare(dico_hash, paste_hash) else: percent = tlsh.diffxlen(dico_hash, paste_hash) if percent > 100: percent = 100 threshold_duplicate = threshold_set[hash_type] if percent < threshold_duplicate: percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep # Go throught the Database of the dico filter (month) r_serv_dico = dico_redis[curr_dico_name] # index of paste index_current = r_serv_dico.get(dico_hash) index_current = index_current paste_path = r_serv_dico.get(index_current)
return status, newhash if __name__ == "__main__": args = docopt(__doc__, version="0.1") if args["hash"]: status, newhash = fetchpage(args["<url>"]) print("[ {} ] for {}: {}".format(status, args["<url>"], newhash)) elif args["check"]: catcherror = 0 try: status, newhash = fetchpage(args["<url>"]) verdict = "Verdict Never Set." color = "green" compare = ssdeep.compare(newhash, args["<lasthash>"]) if compare == 0: verdict = "Site Changed Completely." color = "magenta" elif compare == 100: verdict = "Site Unchanged." color = "green" elif compare < 100 and compare >= 80: verdict = "Site Modified Slightly." color = "yellow" elif compare < 80 and compare >= 50: verdict = "Site Modified Significantly." color = "red" elif compare < 50 and compare > 0: verdict = "Site Modified Heavily." color = "magenta"
def compare(self, h1, h2): return ssdeep.compare(str(h1), str(h2))
def compare(self, other): return ssdeep.compare(self.hash_value, other.hash_value)
def main(): parser = argparse.ArgumentParser( description='''Find similar-looking domains that adversaries can use to attack you. Can detect fraud, phishing attacks and corporate espionage. Useful as an additional source of targeted threat intelligence.''', epilog='''Questions? Complaints? You can reach the author at <*****@*****.**>''' ) parser.add_argument('domain', help='domain name to check') parser.add_argument('-c', '--csv', action='store_true', help='print output in CSV format') parser.add_argument('-r', '--registered', action='store_true', help='show only registered domain names') parser.add_argument('-w', '--whois', action='store_true', help='perform lookup for WHOIS creation/modification date (slow)') parser.add_argument('-g', '--geoip', action='store_true', help='perform lookup for GeoIP location') parser.add_argument('-b', '--banners', action='store_true', help='determine HTTP and SMTP service banners') parser.add_argument('-s', '--ssdeep', action='store_true', help='fetch web pages and compare fuzzy hashes to evaluate similarity') if len(sys.argv) < 2: parser.print_help() sys.exit(0) global args args = parser.parse_args() display(ST_BRIGHT + FG_MAGENTA + ''' _ _ _ _ __| |_ __ ___| |___ _(_)___| |_ / _` | '_ \/ __| __\ \ /\ / / / __| __| | (_| | | | \__ \ |_ \ V V /| \__ \ |_ \__,_|_| |_|___/\__| \_/\_/ |_|___/\__| %s ''' % __version__ + FG_RESET) if not validate_domain(args.domain): sys.stderr.write('ERROR: invalid domain name!\n') sys.exit(-1) domains = fuzz_domain(args.domain.lower()) if not module_dnspython: sys.stderr.write('NOTICE: Missing module: dnspython - DNS features limited!\n') if not module_geoip and args.geoip: sys.stderr.write('NOTICE: Missing module: GeoIP - geographical location not available!\n') if not module_whois and args.whois: sys.stderr.write('NOTICE: Missing module: whois - database not accessible!\n') if not module_ssdeep and args.ssdeep: sys.stderr.write('NOTICE: Missing module: ssdeep - fuzzy hashes not available!\n') if not module_requests and args.ssdeep: sys.stderr.write('NOTICE: Missing module: Requests - web page downloads not possible!\n') if args.ssdeep and module_ssdeep and module_requests: display('Fetching web page from: http://' + args.domain.lower() + '/ [following redirects] ... ') try: req = requests.get('http://' + args.domain.lower(), timeout=2) except: display('Failed!\n') args.ssdeep = False pass else: display('%d %s (%d bytes)\n' % (req.status_code, req.reason, len(req.text))) orig_domain_ssdeep = ssdeep.hash(req.text) display('Processing %d domains ' % len(domains)) signal.signal(signal.SIGINT, sigint_handler) total_hits = 0 for i in range(0, len(domains)): if module_dnspython: resolv = dns.resolver.Resolver() resolv.lifetime = 1 resolv.timeout = 1 try: ns = resolv.query(domains[i]['domain'], 'NS') domains[i]['ns'] = str(ns[0])[:-1].lower() except: pass if 'ns' in domains[i]: try: ns = resolv.query(domains[i]['domain'], 'A') domains[i]['a'] = str(ns[0]) except: pass try: ns = resolv.query(domains[i]['domain'], 'AAAA') domains[i]['aaaa'] = str(ns[0]) except: pass try: mx = resolv.query(domains[i]['domain'], 'MX') domains[i]['mx'] = str(mx[0].exchange)[:-1].lower() except: pass else: try: ip = socket.getaddrinfo(domains[i]['domain'], 80) except: pass else: for j in ip: if '.' in j[4][0]: domains[i]['a'] = j[4][0] break for j in ip: if ':' in j[4][0]: domains[i]['aaaa'] = j[4][0] break if module_whois and args.whois: if 'ns' in domains[i] or 'a' in domains[i]: try: whoisdb = whois.query(domains[i]['domain']) domains[i]['created'] = str(whoisdb.creation_date).replace(' ', 'T') domains[i]['updated'] = str(whoisdb.last_updated).replace(' ', 'T') except: pass if module_geoip and args.geoip: if 'a' in domains[i]: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) try: country = gi.country_name_by_addr(domains[i]['a']) except: pass else: if country: domains[i]['country'] = country if args.banners: if 'a' in domains[i]: banner = http_banner(domains[i]['a'], domains[i]['domain']) if banner: domains[i]['banner-http'] = banner if 'mx' in domains[i]: banner = smtp_banner(domains[i]['mx']) if banner: domains[i]['banner-smtp'] = banner if module_ssdeep and module_requests and args.ssdeep: if 'a' in domains[i]: try: req = requests.get('http://' + domains[i]['domain'], timeout=1) fuzz_domain_ssdeep = ssdeep.hash(req.text) except: pass else: domains[i]['ssdeep'] = ssdeep.compare(orig_domain_ssdeep, fuzz_domain_ssdeep) if 'a' in domains[i] or 'ns' in domains[i]: display(FG_YELLOW + '!' + FG_RESET) total_hits += 1 else: display('.') display(' %d hit(s)\n\n' % total_hits) display_csv('Generator,Domain,A,AAAA,MX,NS,Country,Created,Updated,SSDEEP\n') for i in domains: info = '' if 'a' in i: info += i['a'] if 'country' in i: info += FG_CYAN + '/' + i['country'] + FG_RESET if 'banner-http' in i: info += ' %sHTTP:%s"%s"%s' % (FG_GREEN, FG_CYAN, i['banner-http'], FG_RESET) elif 'ns' in i: info += '%sNS:%s%s%s' % (FG_GREEN, FG_CYAN, i['ns'], FG_RESET) if 'aaaa' in i: info += ' ' + i['aaaa'] if 'mx' in i: info += ' %sMX:%s%s%s' % (FG_GREEN, FG_CYAN, i['mx'], FG_RESET) if 'banner-smtp' in i: info += ' %sSMTP:%s"%s"%s' % (FG_GREEN, FG_CYAN, i['banner-smtp'], FG_RESET) if 'created' in i and 'updated' in i and i['created'] == i['updated']: info += ' %sCreated/Updated:%s%s%s' % (FG_GREEN, FG_CYAN, i['created'], FG_RESET) else: if 'created' in i: info += ' %sCreated:%s%s%s' % (FG_GREEN, FG_CYAN, i['created'], FG_RESET) if 'updated' in i: info += ' %sUpdated:%s%s%s' % (FG_GREEN, FG_CYAN, i['updated'], FG_RESET) if 'ssdeep' in i: if i['ssdeep'] > 0: info += ' %sSSDEEP:%s%d%%%s' % (FG_GREEN, FG_CYAN, i['ssdeep'], FG_RESET) if not info: info = '-' if (args.registered and info != '-') or not args.registered: display('%s%-15s%s %-15s %s\n' % (FG_BLUE, i['type'], FG_RESET, i['domain'], info)) display_csv( '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (i.get('type'), i.get('domain'), i.get('a', ''), i.get('aaaa', ''), i.get('mx', ''), i.get('ns', ''), i.get('country', ''), i.get('created', ''), i.get('updated', ''), str(i.get('ssdeep', ''))) ) display(FG_RESET + ST_RESET) return 0
soup = BeautifulSoup(doc2, "lxml") doc2 = soup.get_text() doc2_hash = ssdeep.hash(doc2) except: doc2 = rawdoc2 doc2_hash = ssdeep.hash(doc2) entropy1 = H(doc2.replace(' ', ''), range_printable) entropy2 = Entropy(doc2) doc1TokenList = re.split(r'(\d+|\W+)', doc1) doc2TokenList = re.split(r'(\d+|\W+)', doc2) kldScore = KLD(doc1TokenList, doc2TokenList) kldScore = 0 simScore = compare(doc1, doc2) hashSimScore = ssdeep.compare(doc1_hash, doc2_hash) normalizedSimScore = ((90 - simScore) / 90) * 100 try: print('insert into ' + dbname + ' values (NULL,\'' + url + '\',\'' + citedByNode + '\',\'' + initialDate + '\',\'copy\',' + str(count) + ',\'' + htype + '\',\'' + laststatus + '\',\'' + statuschain + '\',\'' + citeDate + '\',' + size + ',\'' + sim + '\',\'' + hash + '\',' + str(normalizedSimScore) + ',' + str(entropy1) + ',' + str(entropy2) + ',' + str(kldScore) + ');') except: print(' a copy failure occurred ') # try: # print ("\"%s\",\"%s\",\"%s\",%f,%f,%f" % (url, original, copy, simScore, hashSimScore, normalizedSimScore))