Exemplo n.º 1
0
def crawlerBook(url, imagePath):
    print("now :" + url)
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36' }
    html = requests.get(url, timeout=(30.0, 30.0), headers=header).text
    soup = BeautifulSoup(html, "html.parser")
    book = Book(
        isbn=crawler.getIsbn(soup),
        name=crawler.getName(soup),
        name2=crawler.getName2(soup),
        author=crawler.getAuthor(soup),
        author2=crawler.getAuthor2(soup),
        translator=crawler.getTranslator(soup),
        publisher=crawler.getPublisher(soup),
        publicationDate=crawler.getPublicationDate(soup),
        language=crawler.getLanguage(soup),
        collection=crawler.getCollection(soup),
        specification=crawler.getSpecification(soup),
        publication=crawler.getPublication(soup),
        classification=crawler.getClassification(soup),
        coverImageUrl=crawler.getCoverImageUrl(soup),
        bookIntroduction=crawler.getBookIntroduction(soup),
        authorIntroduction=crawler.getAuthorIntroduction(soup),
        catalog=crawler.getCatalog(soup),
        preface=crawler.getPreface(soup),
        fromWhere="books"
    )
    # save image
    book.bookUrl = url
    if book.coverImageUrl != None:
        if book.isbn != None:
            book.coverImageId = book.isbn + "-" + book.fromWhere + ".jpg"
            imageSaver.saveImageFile(imagePath + book.coverImageId, book.coverImageUrl)

    return book