def getLinks(self, baseURL, allowURLPattern): visitedLinks = set() nonVisitedLinks = [baseURL] for url in nonVisitedLinks: if self.isURLMatch(url, allowURLPattern): if url not in visitedLinks: try: print('zkousim ' + url) visitedLinks.add(url) content = download(url) links = self.parser.getLinks(content, url) nonVisitedLinks += links except Exception as err: print(err) return visitedLinks
def downloadWebsite(self, url): data = download(url) charset = getEncoding(data) return data.decode(charset.lower())