예제 #1
0
파일: tests.py 프로젝트: dissemin/croawl
 def test_prune_with_reverse(self):
     t = PrefTree()
     for url, success in [
         ('researchgate.net/publication/233865122_uriset', False),
         ('researchgate.net/publication/143874230_albtedru', False),
         ('researchgate.net/publication/320748374_kelbcad', False),
         ('researchgate.net/publication/233865122_uriset.pdf', True),
         ('researchgate.net/publication/143874230_albtedru.pdf', True),
         ('researchgate.net/publication/320748374_kelbcad.pdf', True),
         ('onlinelibrary.wiley.com/wol1/doi/10.1002/anie.200800037.abstract', False),
         ('onlinelibrary.wiley.com/wol1/doi/10.1002/anie.200800037.pdf', False)]:
         t.add_url(url, success)
     t.print_as_tree()
     t, pruned = t.prune(reverse=True)
     t.print_as_tree()
     self.assertTrue(t.check_sanity())
     for u, c, s in t.urls():
         print flatten(u), c, s
예제 #2
0
파일: tests.py 프로젝트: dissemin/croawl
    def test_create(self):
        t = PrefTree()
        urls = ['aaba','cadb','abdc','abcd','afgh','abec']
        for u in urls:
            t.add_url(u)
            self.assertTrue(t.check_sanity())
        self.assertFalse(t.has_wildcard())
        t.print_as_tree()

        self.assertEqual(sorted([flatten(u) for u, c, s in t.urls()]), sorted(urls))
        for u in urls:
            self.assertEqual(t.match(u), (1,0))
        self.assertEqual(t.match('bac'), (0,0))