def crawl(self, URL, depth=0): if depth <= P.crawlerProp.depth: print("DEPTH >> ", depth) print("VISITING URL >> ", URL) try: requester = Requester(URL) HTML = requester.getHtml() parser = Parser(HTML) links = parser.getTag('a') #print(HTML) words = F.extractWords( parser.getTags(list(P.crawlerProp.atlas.keys())), P.crawlerProp.pos_tag) print(words) depth += 1 for link in links: if link is not None: if Tag(link).hasKey('href'): nURL = link['href'] if F.urlValid( link['href']) else ( F.urlFix(URL, link['href']) if F.urlValid( F.urlFix(URL, link['href'])) else None) if self.__pass(nURL): self.visited.append(nURL) #print(nURL) self.crawl(nURL, depth) #else: #print("SKIPPING URL NOT VALID >> ", nURL) except: print(Exception()) else: print("REACHED DEPTH LIMIT FOR >> ", URL)