示例#1
0
    def getLinks(self, baseURL, allowURLPattern):
        visitedLinks = set()
        nonVisitedLinks = [baseURL]

        for url in nonVisitedLinks:
            if self.isURLMatch(url, allowURLPattern):
                if url not in visitedLinks:
                    try:
                        print('zkousim ' + url)
                        visitedLinks.add(url)
                        content = download(url)
                        links = self.parser.getLinks(content, url)
                        nonVisitedLinks += links
                    except Exception as err:
                        print(err)

        return visitedLinks
示例#2
0
	def getLinks(self, baseURL, allowURLPattern):
		visitedLinks = set()
		nonVisitedLinks = [baseURL]
		
		for url in nonVisitedLinks:
			if self.isURLMatch(url, allowURLPattern):
				if url not in visitedLinks:
					try:
						print('zkousim ' + url)
						visitedLinks.add(url)
						content = download(url)
						links = self.parser.getLinks(content, url)
						nonVisitedLinks += links
					except Exception as err:
						print(err)
					
		return visitedLinks
示例#3
0
	def downloadWebsite(self, url):
		data = download(url)
		charset = getEncoding(data)
		return data.decode(charset.lower())