def __init__(self, **specs): self.link_annotation = LinkAnnotation() self.visited = set() self.url_to_link = {} start_urls = specs.get('start_urls') if start_urls: self.link_annotation.load(start_urls) for url in start_urls: self.url_to_link[url] = Link(url) self.visited.add(url) self.link_annotation.mark_link(url, follow=True) super(PaginationExtractor, self).__init__()
def __init__(self): self.link_annotation = LinkAnnotation() self.visited = set() self.url_to_link = {} super(PaginationExtractor, self).__init__()