Пример #1
0
 def test_wildcard(self):
     t = PrefTree()
     t.add_url('arxiv.org/pdf/', True)
     t['arxiv.org/pdf/'].is_wildcard = True
     self.assertTrue(t.check_sanity())
     self.assertTrue(t.has_wildcard())
     self.assertEqual(t.match('arxiv.org/pdf/1410.1454v2'), (1,1))
     t.add_url('arxiv.org/pdf/1412.8548v1', True)
     self.assertEqual(t.match('arxiv.org/pdf/1410.1454v2'), (2,2))
     t.print_as_tree()
     self.assertEqual(len(t.urls()), 1)
Пример #2
0
    def test_create(self):
        t = PrefTree()
        urls = ['aaba','cadb','abdc','abcd','afgh','abec']
        for u in urls:
            t.add_url(u)
            self.assertTrue(t.check_sanity())
        self.assertFalse(t.has_wildcard())
        t.print_as_tree()

        self.assertEqual(sorted([flatten(u) for u, c, s in t.urls()]), sorted(urls))
        for u in urls:
            self.assertEqual(t.match(u), (1,0))
        self.assertEqual(t.match('bac'), (0,0))
Пример #3
0
    def test_prune(self):
        t = PrefTree()
        with self.assertRaises(ValueError):
            t.prune(min_urls=0)

        for url, success in [
                ('arxiv.org/pdf/1410.1234', True),
                ('arxiv.org/pdf/1409.1094', True),
                ('arxiv.org/pdf/1201.5480', True),
                ('arxiv.org/pdf/1601.01234', True),
                ('arxiv.org/pdf/1602.01i34', False), # oops
                ]:
            t.add_url(url, success)
        t, pruned = t.prune(min_rate=0.75,min_children=2,min_urls=1)
        self.assertEqual(len(t.urls()), 1)
        self.assertTrue(t.has_wildcard())
        self.assertEqual(t.match('arxiv.org/pdf/1784.1920'), (5,4))
        self.assertEqual(t.match('arxiv.org/pdf/2340.0124'), (0,0))
        self.assertTrue(t.predict_success('arxiv.org/pdf/1784.1920', threshold=0.6, min_urls=3))
        t.print_as_tree()