def create_enums(self, d): kfh = CKoretFuzzyHashing() kfh.bsize = 1 kfh.output_size = 8 fuzzy_hashes = {} for key in d.keys(): hash1, hash2, _ = kfh.hash_bytes(key).split(";") new_key = "%s-%s" % (hash1, hash2) if new_key in fuzzy_hashes: fuzzy_hashes[new_key].append(key) else: fuzzy_hashes[new_key] = [key] enums = {} enums[DEFAULT_ENUM] = [] for key in fuzzy_hashes: l = fuzzy_hashes[key] if len(l) == 1: continue enum_name = self.get_enum_name(l) enums[enum_name] = [] tmp = [] for element in l: tmp.append(" %s = %s, " % (element, str(d[element]))) tmp.sort() tmp.insert(0, "enum %s {" % enum_name) tmp.append("};") enums[enum_name] = "\n".join(tmp) return enums
class TestKFuzzy(unittest.TestCase): kfd = None buf = "" def setUp(self): self.kfd = CKoretFuzzyHashing() buf = "" for c in xrange(0, 255): buf += chr(c) * 512 self.buf = buf def testDHA(self): """ Default hashing algorithm (DHA) """ key = "AgIEBAQEBgYGBggICAgKCgoKDAwMDA4O;BAQGBggICgoMDA4OEBASEhQUFhYYGBoa;+/v5+ff39fXz8/Hx7+/t7evr6enn5+Xl" hash = self.kfd.hash_bytes(self.buf) self.assert_(key == hash) def testFHA(self): key = "IAIEBggKDA4QEhQWGBocHg;AgQGCAoMDhASFBYYGhweICIkJigqLC4w;vb/Bw8XHycvNz9HT1dfZ293f4ePl5+nr" self.kfd.algorithm = self.kfd._fast_hash hash = self.kfd.hash_bytes(self.buf) self.assert_(key == hash) def testSimplified(self): key = "/v4DA/39Bgb8/AkJ+/sMDPr6Dw/5+RIS;AYB//n/+fv1+/X38ffx8+3z7e/p7+nr5;+3z8ff19/X7+fv5/f4ABgAGBAoECggOC" buf = self.buf * 16 self.kfd.algorithm = self.kfd.simplified hash = self.kfd.hash_bytes(buf) self.assert_(key == hash) def testOutputSize(self): self.kfd.algorithm = None l = len(self.kfd.hash_bytes(self.buf)) size = self.kfd.output_size * 3 size += 2 self.assertEqual(l, size) def testNullBlocks(self): buf = "\x00" * 8192 buf += self.buf self.kfd.algorithm = None self.kfd.reduce_errors = True h = self.kfd.hash_bytes(buf) self.failUnless(h.find("AA") == -1) def testSimplified(self): self.kfd.algorithm = self.kfd.simplified h = self.kfd.hash_bytes(self.buf + self.buf) self.kfd.algorithm = self.kfd._fast_hash h2 = self.kfd.hash_bytes(self.buf + self.buf) self.failUnless(((self.kfd.output_size * 3) + 2) - self.kfd.edit_distance(h, h2) < 16)
class TestKFuzzy(unittest.TestCase): kfd = None buf = "" def setUp(self): self.kfd = CKoretFuzzyHashing() buf = "" for c in xrange(0, 255): buf += chr(c)*512 self.buf = buf def testDHA(self): """ Default hashing algorithm (DHA) """ key = "AgIEBAQEBgYGBggICAgKCgoKDAwMDA4O;BAQGBggICgoMDA4OEBASEhQUFhYYGBoa;+/v5+ff39fXz8/Hx7+/t7evr6enn5+Xl" hash = self.kfd.hash_bytes(self.buf) self.assert_(key == hash) def testFHA(self): key = "IAIEBggKDA4QEhQWGBocHg;AgQGCAoMDhASFBYYGhweICIkJigqLC4w;vb/Bw8XHycvNz9HT1dfZ293f4ePl5+nr" self.kfd.algorithm = self.kfd._fast_hash hash = self.kfd.hash_bytes(self.buf) self.assert_(key == hash) def testSimplified(self): key = "/v4DA/39Bgb8/AkJ+/sMDPr6Dw/5+RIS;AYB//n/+fv1+/X38ffx8+3z7e/p7+nr5;+3z8ff19/X7+fv5/f4ABgAGBAoECggOC" buf = self.buf * 16 self.kfd.algorithm = self.kfd.simplified hash = self.kfd.hash_bytes(buf) self.assert_(key == hash) def testOutputSize(self): self.kfd.algorithm = None l = len(self.kfd.hash_bytes(self.buf)) size = self.kfd.output_size * 3 size += 2 self.assertEqual(l, size) def testNullBlocks(self): buf = "\x00"*8192 buf += self.buf self.kfd.algorithm = None self.kfd.reduce_errors = True h = self.kfd.hash_bytes(buf) self.failUnless(h.find("AA") == -1) def testSimplified(self): self.kfd.algorithm = self.kfd.simplified h = self.kfd.hash_bytes(self.buf + self.buf) self.kfd.algorithm = self.kfd._fast_hash h2 = self.kfd.hash_bytes(self.buf + self.buf) self.failUnless(((self.kfd.output_size*3)+2) - self.kfd.edit_distance(h, h2) < 16)
def create_enums(self, d): kfh = CKoretFuzzyHashing() kfh.bsize = 1 kfh.output_size = 8 fuzzy_hashes = {} for key in d.keys(): hash1, hash2, _ = kfh.hash_bytes(key).split(";") new_key = "%s-%s" % (hash1, hash2) if new_key in fuzzy_hashes: fuzzy_hashes[new_key].append(key) else: fuzzy_hashes[new_key] = [key] enums = {} enums[DEFAULT_ENUM] = [] for key in fuzzy_hashes: l = fuzzy_hashes[key] if len(l) == 1: continue enum_name = self.get_enum_name(l) enums[enum_name] = [] tmp = [] for element in l: value = None if type(d[element]) is decimal.Decimal: eng_str = d[element].to_eng_string() if str(eng_str).find(".") == -1: value = "0x%08x" % long(eng_str) if value is None: value = str(d[element]) tmp.append(" %s = %s, " % (element, value)) tmp.sort() tmp.insert(0, "enum %s {" % enum_name) tmp.append("};") enums[enum_name] = "\n".join(tmp) return enums