def get(self): global total_data, crawl_count, crawled if crawl_count >= DEPTH_LIMIT: return False crawled.add(self.url) data = self.fetch() if data and data != bytearray(b' '): if total_data > CONTENT_LIMIT: return False total_data += len(data) crawl_count += 1 webserver.save(self.url, self.root, self.type, data) s = Scraper(data, self.console) if self.type not in ["JS", "CSS"]: #css css_links = s.get_css() for link in css_links: if link: c = Crawler(link, self, "CSS", self.console) if c.url not in crawled: c.get() else: pass if self.type not in ["JS", "CSS"]: #js js_links = s.get_script() self.console.print(js_links) for link in js_links: if link: c = Crawler(link, self, "JS", self.console) if c.url not in crawled: c.get() else: pass # hrefs if self.type == "HTML": links = s.get_links() for link in links: if link: c = Crawler(link, self, "HTML", self.console) if c.url not in crawled: c.get() else: pass