def run(self): html = requests.get(self.url, timeout=(10.0, 10.0)).text soup = BeautifulSoup(html, "html.parser") pageSize = listCrawler.getPageSize(soup) for index in range(pageSize): page = index+1 html2 = requests.get(self.url + str(page), timeout=(10.0, 10.0)).text soup2 = BeautifulSoup(html2, "html.parser") for bookurl in listCrawler.getBookList(soup2): runner.booksQueue.put(bookurl) time.sleep(1)
__author__ = 'johnnytsai' now_save = 0 now_error = [] for l in urlList.booklist: html = requests.get(l, timeout=(10.0, 10.0)).text soup = BeautifulSoup(html, "html.parser") pageSize = listCrawler.getPageSize(soup) for index in range(pageSize): page = index+1 html2 = requests.get(l + str(page), timeout=(10.0, 10.0)).text soup2 = BeautifulSoup(html2, "html.parser") #print(listCrawler.getBookList(soup2)) for bookurl in listCrawler.getBookList(soup2): book = bookRunner.crawlerBook(bookurl, "/Users/johnnytsai/Desktop/books/image/") """ print("ISBN: " + ("None" if book.isbn == None else book.isbn)) print("Name: " + ("None" if book.name == None else book.name)) print("Name2: " + ("None" if book.name2 == None else book.name2)) print("Author: " + ("None" if book.author == None else book.author)) print("Author2: " + ("None" if book.author2 == None else book.author2)) print("Translator: " + ("None" if book.translator == None else book.translator)) print("Publisher: " + ("None" if book.publisher == None else book.publisher)) print("PublicationDate: " + ("None" if book.publicationDate == None else book.publicationDate)) print("Language: " + ("None" if book.language == None else book.language)) print("Collection: " + ("None" if book.collection == None else book.collection)) print("Specification: " + ("None" if book.specification == None else book.specification)) print("Publication: " + ("None" if book.publication == None else book.publication)) print("Classification: " + ("None" if book.classification == None else book.classification))