示例#1
0
 def __init__(self, **specs):
     self.link_annotation = LinkAnnotation()
     self.visited = set()
     self.url_to_link = {}
     start_urls = specs.get('start_urls')
     if start_urls:
         self.link_annotation.load(start_urls)
         for url in start_urls:
             self.url_to_link[url] = Link(url)
             self.visited.add(url)
             self.link_annotation.mark_link(url, follow=True)
     super(PaginationExtractor, self).__init__()
示例#2
0
 def __init__(self):
     self.link_annotation = LinkAnnotation()
     self.visited = set()
     self.url_to_link = {}
     super(PaginationExtractor, self).__init__()