def crawl(current_url): print('Total in Queue', len(Crawler.queue), '| Total Crawled', len(Crawler.crawled)) if '.vhd' not in current_url: try: with urllib.request.urlopen(current_url) as response: html = response.read() soup = BeautifulSoup(html, "html.parser") print(" crawling", current_url) for link in soup.findAll('a', attrs={'href': re.compile("^http")}): href = link.get('href') if href not in Crawler.queue and href not in Crawler.crawled: Crawler.queue.add(href) Crawler.crawled.add(current_url) Crawler.queue.discard(current_url) Indexer.indexer(current_url, soup) Crawler.save_lists() except: print("ERROR", current_url) Crawler.queue.discard(current_url) Crawler.save_lists() pass