def test_jarcard_index(self): r1 = range(10000) m1 = MaxHash(8192) r2 = range(2000, 10000) m2 = MaxHash(8192) for i in r1: m1.add(str(i)) for i in r2: m2.add(str(i)) ix = MaxHash.get_jaccard_index([m1, m2]) self.assertAlmostEqual(ix, 0.80, 2)
def test_union(self): r1 = range(10000) m1 = MaxHash(8192) r2 = range(2000, 12000) m2 = MaxHash(8192) r3 = range(15000) m3 = MaxHash(8192) for i in r1: m1.add(str(i)) for i in r2: m2.add(str(i)) for i in r3: m3.add(str(i)) m4 = m1.union(m2) ix = MaxHash.get_jaccard_index([m3, m4]) self.assertAlmostEqual(ix, 0.80, 2)
def test_add(self): m = MaxHash(8192) m.add(str(1)) m.add(str(2)) m.add(str(3)) m.add(str(4)) self.assertEqual(len(m.uniq()), 4)
def test_merge(self): r1 = range(10000) m1 = MaxHash(8192) r2 = range(2000, 12000) m2 = MaxHash(8192) r3 = range(15000) m3 = MaxHash(8192) for i in r1: m1.add(str(i)) for i in r2: m2.add(str(i)) for i in r3: m3.add(str(i)) m2.merge(m1) ix = MaxHash.get_jaccard_index([m2, m3]) self.assertAlmostEqual(ix, 0.80, 2)
def _inner_deault(): from maxhash import MaxHash return MaxHash()