Exemplo n.º 1
0
 def setUp(self):
     objs = [(str(k), v) for k, v in self.data.items()]
     self.index = SimhashIndexWithMongo(objs, k=10)
 def setUp(self):
     objs = [(str(k), v) for k, v in self.data.items()]
     self.index = SimhashIndexWithMongo(objs, k=10)
Exemplo n.º 3
0
class TestSimhashIndexWithMongo(TestCase):
    data = {
        1: "How are you? I Am fine. blar blar blar blar blar Thanks.",
        2: "How are you i am fine. blar blar blar blar blar than",
        3: "This is simhash test.",
        4: "How are you i am fine. blar blar blar blar blar thank1",
    }

    def setUp(self):
        objs = [(str(k), v) for k, v in self.data.items()]
        self.index = SimhashIndexWithMongo(objs, k=10)

    def test_get_near_dup(self):
        s1 = Simhash(u"How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar thank")
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)

        self.index.delete("1", Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 2)

        self.index.delete("1", Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 2)

        self.index.add("1", Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)

        self.index.add("1", Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)
class TestSimhashIndexWithMongo(TestCase):
    data = {
        1: 'How are you? I Am fine. blar blar blar blar blar Thanks.',
        2: 'How are you i am fine. blar blar blar blar blar than',
        3: 'This is simhash test.',
        4: 'How are you i am fine. blar blar blar blar blar thank1',
    }

    def setUp(self):
        objs = [(str(k), v) for k, v in self.data.items()]
        self.index = SimhashIndexWithMongo(objs, k=10)

    def test_get_near_dup(self):
        s1 = Simhash(
            u'How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar thank'
        )
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)

        self.index.delete('1', Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 2)

        self.index.delete('1', Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 2)

        self.index.add('1', Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)

        self.index.add('1', Simhash(self.data[1]))
        dups = self.index.get_near_dups(s1)
        self.assertEqual(len(dups), 3)