def import_CSV(search_dir=None, library='eBook'): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" Optionally delete the file on successful completion """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg csvFile = csv_file(search_dir, library=library) headers = None myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 total = 0 existing = 0 if not csvFile: msg = "No %s CSV file found in %s" % (library, search_dir) logger.warn(msg) return msg else: logger.debug('Reading file %s' % csvFile) csvreader = reader(open(csvFile, 'rU')) for row in csvreader: if csvreader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row if 'Author' not in headers or 'Title' not in headers: msg = 'Invalid CSV file found %s' % csvFile logger.warn(msg) return msg else: total += 1 item = dict(list(zip(headers, row))) authorname = formatAuthorName(item['Author']) title = makeUnicode(item['Title']) authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,)) if authmatch: logger.debug("CSV: Author %s found in database" % authorname) else: logger.debug("CSV: Author %s not found" % authorname) newauthor, authorid, new = addAuthorNameToDB(author=authorname, addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']) if len(newauthor) and newauthor != authorname: logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor)) authorname = newauthor if new: authcount += 1 bookmatch = finditem(item, authorname, library=library) result = '' imported = '' if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] if library == 'eBook': bookstatus = bookmatch['Status'] else: bookstatus = bookmatch['AudioStatus'] if bookstatus in ['Open', 'Wanted', 'Have']: existing += 1 logger.info('Found %s %s by %s, already marked as "%s"' % (library, bookname, authorname, bookstatus)) else: # skipped/ignored logger.info('Found %s %s by %s, marking as "Wanted"' % (library, bookname, authorname)) controlValueDict = {"BookID": bookid} if library == 'eBook': newValueDict = {"Status": "Wanted"} else: newValueDict = {"AudioStatus": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount += 1 else: searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']: bookmatch = True if not bookmatch: # no match on full searchterm, try splitting out subtitle newtitle, _ = split_title(authorname, title) if newtitle != title: title = newtitle searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']: bookmatch = True if bookmatch: logger.info("Found (%s%% %s%%) %s: %s for %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'], authorname, title)) if library == 'eBook': import_book(result['bookid'], ebook="Wanted", wait=True) else: import_book(result['bookid'], audio="Wanted", wait=True) imported = myDB.match('select * from books where BookID=?', (result['bookid'],)) if imported: bookcount += 1 else: bookmatch = False if not bookmatch: msg = "Skipping book %s by %s" % (title, authorname) if not result: msg += ', No results found' logger.warn(msg) elif not imported: msg += ', Failed to import %s' % result['bookid'] logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) skipcount += 1 msg = "Found %i %s%s in csv file, %i already existing or wanted" % (total, library, plural(total), existing) logger.info(msg) msg = "Added %i new author%s, marked %i %s%s as 'Wanted', %i %s%s not found" % \ (authcount, plural(authcount), bookcount, library, plural(bookcount), skipcount, plural(skipcount), library) logger.info(msg) if lazylibrarian.CONFIG['DELETE_CSV']: if skipcount == 0: logger.info("Deleting %s on successful completion" % csvFile) try: os.remove(csvFile) except OSError as why: logger.warn('Unable to delete %s: %s' % (csvFile, why.strerror)) else: logger.warn("Not deleting %s as not all books found" % csvFile) if os.path.isdir(csvFile + '.fail'): try: shutil.rmtree(csvFile + '.fail') except Exception as why: logger.warn("Unable to remove %s, %s %s" % (csvFile + '.fail', type(why).__name__, str(why))) try: _ = safe_move(csvFile, csvFile + '.fail') except Exception as e: logger.error("Unable to rename %s, %s %s" % (csvFile, type(e).__name__, str(e))) if not os.access(csvFile, os.R_OK): logger.error("%s is not readable" % csvFile) if not os.access(csvFile, os.W_OK): logger.error("%s is not writeable" % csvFile) parent = os.path.dirname(csvFile) try: with open(os.path.join(parent, 'll_temp'), 'w') as f: f.write('test') os.remove(os.path.join(parent, 'll_temp')) except Exception as why: logger.error("Directory %s is not writeable: %s" % (parent, why)) return msg except Exception: msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % str(e)) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for unwanted language, just warn # valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if bookLanguage not in valid_langs: logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage)) if rootxml.find('./book/publication_year').text is None: bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return bookname = unaccented(bookname) bookname, booksub = split_title(authorname, bookname) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic).strip() booksub = replace_all(booksub, dic).strip() if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" Optionally delete the file on successful completion """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg csvFile = csv_file(search_dir) headers = None myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 total = 0 existing = 0 if not csvFile: msg = "No CSV file found in %s" % search_dir logger.warn(msg) return msg else: logger.debug('Reading file %s' % csvFile) csvreader = reader(open(csvFile, 'rU')) for row in csvreader: if csvreader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row if 'Author' not in headers or 'Title' not in headers: msg = 'Invalid CSV file found %s' % csvFile logger.warn(msg) return msg else: total += 1 item = dict(list(zip(headers, row))) authorname = formatAuthorName(item['Author']) title = makeUnicode(item['Title']) authmatch = myDB.match( 'SELECT * FROM authors where AuthorName=?', (authorname, )) if authmatch: logger.debug("CSV: Author %s found in database" % authorname) else: logger.debug("CSV: Author %s not found" % authorname) newauthor, authorid, new = addAuthorNameToDB( author=authorname, addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']) if len(newauthor) and newauthor != authorname: logger.debug( "Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor)) authorname = newauthor if new: authcount += 1 bookmatch = finditem(item, authorname) result = '' imported = '' if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: existing += 1 logger.info( 'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( 'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount += 1 else: searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']: bookmatch = True if not bookmatch: # no match on full searchterm, try splitting out subtitle newtitle, _ = split_title(authorname, title) if newtitle != title: title = newtitle searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']: bookmatch = True if bookmatch: logger.info( "Found (%s%% %s%%) %s: %s for %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'], authorname, title)) import_book(result['bookid'], wait=True) imported = myDB.match( 'select * from books where BookID=?', (result['bookid'], )) if imported: bookcount += 1 else: bookmatch = False if not bookmatch: msg = "Skipping book %s by %s" % (title, authorname) if not result: msg += ', No results found' logger.warn(msg) elif not imported: msg += ', Failed to import %s' % result['bookid'] logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) skipcount += 1 msg = "Found %i book%s in csv file, %i already existing or wanted" % ( total, plural(total), existing) logger.info(msg) msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \ (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)) logger.info(msg) if lazylibrarian.CONFIG['DELETE_CSV']: if skipcount == 0: logger.info("Deleting %s on successful completion" % csvFile) try: os.remove(csvFile) except OSError as why: logger.warn('Unable to delete %s: %s' % (csvFile, why.strerror)) else: logger.warn("Not deleting %s as not all books found" % csvFile) return msg except Exception: msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False): try: api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % str(e)) return if rootxml is None: logger.debug("Error requesting author books") return if not in_cache: api_hits += 1 resultxml = rootxml.getiterator('book') valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text # Goodreads sometimes puts extra whitepase in the author names! authorNameResult = ' '.join(authorNameResult.split()) logger.debug(u"GoodReads author name [%s]" % authorNameResult) loopCount = 1 while resultxml: for book in resultxml: total_count += 1 if book.find('publication_year').text is None: pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if 'nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if book.find('isbn').text: find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if book.find('isbn13').text: find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] # Try to use shortcut of ISBN identifier codes described here... # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups if isbnhead: if find_field == "isbn13" and isbn.startswith('979'): for item in lazylibrarian.isbn_979_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_979_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead)) elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')): for item in lazylibrarian.isbn_978_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_978_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead)) if bookLanguage == "Unknown" and isbnhead: # Nothing in the isbn dictionary, try any cached results match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if match: bookLanguage = match['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits += 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if 'invalid' in resp or 'Unknown' in resp: bookLanguage = "Unknown" else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e))) if bookLanguage == "Unknown": # still no earlier match, we'll have to search the goodreads api try: if book.find(find_field).text: BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) bookLanguage = "" try: BOOK_rootxml, in_cache = get_xml_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now try: bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.debug("Error finding language_code in book xml: %s" % str(e)) except Exception as e: logger.debug("Error getting book xml: %s" % str(e)) if not in_cache: gr_lang_hits += 1 if not bookLanguage: bookLanguage = "Unknown" # At this point, give up? # WhatWork on author/title doesn't give us a language. # It might give us the "original language" of the book (but not always) # and our copy might not be in the original language anyway # eg "The Girl With the Dragon Tattoo" original language Swedish # If we have an isbn, try WhatISBN to get alternatives # in case any of them give us a language, but it seems if thinglang doesn't # have a language for the first isbn code, it doesn't for any of the # alternatives either # Goodreads search results don't include the language. Although sometimes # it's in the html page, it's not in the xml results if isbnhead != "": # if GR didn't give an isbn we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached += 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"Goodreads language search failed: %s" % str(e)) if bookLanguage not in valid_langs: logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage)) ignored += 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) bookname, booksub = split_title(authorNameResult, bookname) dic = {':': '.', '"': ''} # do we need to strip apostrophes , '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) rejected = False check_status = False if re.match('[^\w-]', bookname): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: if pubyear > today()[:4]: logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear)) removedResults += 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \ (bookname, authorNameResult.replace('"', '""')) match = myDB.match(cmd) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorNameResult, bookname, bookid)) duplicates += 1 rejected = True if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid match = myDB.match(cmd) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorNameResult != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorNameResult, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorNameResult, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status locked = False # Is the book already in the database? # Leave alone if locked or status "ignore" if not locked and book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today() } resultsCount += 1 updated = False myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) updated = True else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug(u"[%s] Added book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) added_count += 1 elif updated: logger.debug(u"[%s] Updated book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) updated_count += 1 else: book_ignore_count += 1 loopCount += 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits += 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % str(e)) if resultxml: if all(False for _ in resultxml): # returns True if iterator is empty resultxml = None deleteEmptySeries() lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
def find_book_in_db(myDB, author, book): # PAB fuzzy search for book in library, return LL bookid if found or zero # if not, return bookid to more easily update status # prefer an exact match on author & book match = myDB.match('SELECT BookID FROM books where AuthorName="%s" and BookName="%s"' % (author.replace('"', '""'), book.replace('"', '""'))) if match: logger.debug('Exact match [%s]' % book) return match['BookID'] else: # Try a more complex fuzzy match against each book in the db by this author # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>85) # These are results that work well on my library, minimal false matches and no misses # on books that should be matched # Maybe make ratios configurable in config.ini later books = myDB.select('SELECT BookID,BookName,BookISBN FROM books where AuthorName="%s"' % author.replace('"', '""')) best_ratio = 0 best_partial = 0 best_partname = 0 ratio_name = "" partial_name = "" partname_name = "" ratio_id = 0 partial_id = 0 partname_id = 0 partname = 0 book_lower = unaccented(book.lower()) book_partname, book_sub = split_title(author, book_lower) if book_partname == book_lower: book_partname = '' for a_book in books: # tidy up everything to raise fuzziness scores # still need to lowercase for matching against partial_name later on a_book_lower = unaccented(a_book['BookName'].lower()) # ratio = fuzz.ratio(book_lower, a_book_lower) partial = fuzz.partial_ratio(book_lower, a_book_lower) if book_partname: partname = fuzz.partial_ratio(book_partname, a_book_lower) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book_lower)) words -= len(getList(a_book_lower)) ratio -= abs(words) partial -= abs(words) if ratio > best_ratio: best_ratio = ratio ratio_name = a_book['BookName'] ratio_id = a_book['BookID'] if partial > best_partial: best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if partname > best_partname: best_partname = partname partname_name = a_book['BookName'] partname_id = a_book['BookID'] if partial == best_partial: # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest # this eliminates most false matches against omnibuses when we want a single book # find the position of the shortest string in the longest if len(getList(book_lower)) >= len(getList(a_book_lower)): match1 = book_lower.find(a_book_lower) else: match1 = a_book_lower.find(book_lower) if len(getList(book_lower)) >= len(getList(partial_name.lower())): match2 = book_lower.find(partial_name.lower()) else: match2 = partial_name.lower().find(book_lower) if match1 < match2: logger.debug( "Fuzz left change, prefer [%s] over [%s] for [%s]" % (a_book['BookName'], partial_name, book)) best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if best_ratio > 90: logger.debug( "Fuzz match ratio [%d] [%s] [%s]" % (best_ratio, book, ratio_name)) return ratio_id if best_partial > 85: logger.debug( "Fuzz match partial [%d] [%s] [%s]" % (best_partial, book, partial_name)) return partial_id if best_partname > 95: logger.debug( "Fuzz match partname [%d] [%s] [%s]" % (best_partname, book, partname_name)) return partname_id logger.debug( 'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s], partname [%d,%s]' % (author, book, best_ratio, ratio_name, best_partial, partial_name, best_partname, partname_name)) return 0
def find_book_in_db(myDB, author, book): # PAB fuzzy search for book in library, return LL bookid if found or zero # if not, return bookid to more easily update status # prefer an exact match on author & book cmd = 'SELECT BookID FROM books,authors where books.AuthorID = authors.AuthorID ' cmd += 'and AuthorName="%s" COLLATE NOCASE and BookName="%s" COLLATE NOCASE' % \ (author.replace('"', '""'), book.replace('"', '""')) match = myDB.match(cmd) if match: logger.debug('Exact match [%s]' % book) return match['BookID'] else: # Try a more complex fuzzy match against each book in the db by this author # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>85) # These are results that work well on my library, minimal false matches and no misses # on books that should be matched # Maybe make ratios configurable in config.ini later cmd = 'SELECT BookID,BookName,BookISBN FROM books,authors where books.AuthorID = authors.AuthorID ' cmd += 'and AuthorName="%s" COLLATE NOCASE' % author.replace('"', '""') books = myDB.select(cmd) best_ratio = 0 best_partial = 0 best_partname = 0 ratio_name = "" partial_name = "" partname_name = "" ratio_id = 0 partial_id = 0 partname_id = 0 partname = 0 book_lower = unaccented(book.lower()) book_partname, book_sub = split_title(author, book_lower) if book_partname == book_lower: book_partname = '' for a_book in books: # tidy up everything to raise fuzziness scores # still need to lowercase for matching against partial_name later on a_book_lower = unaccented(a_book['BookName'].lower()) # ratio = fuzz.ratio(book_lower, a_book_lower) partial = fuzz.partial_ratio(book_lower, a_book_lower) if book_partname: partname = fuzz.partial_ratio(book_partname, a_book_lower) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book_lower)) words -= len(getList(a_book_lower)) ratio -= abs(words) partial -= abs(words) if ratio > best_ratio: best_ratio = ratio ratio_name = a_book['BookName'] ratio_id = a_book['BookID'] if partial > best_partial: best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if partname > best_partname: best_partname = partname partname_name = a_book['BookName'] partname_id = a_book['BookID'] if partial == best_partial: # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest # this eliminates most false matches against omnibuses when we want a single book # find the position of the shortest string in the longest if len(getList(book_lower)) >= len(getList(a_book_lower)): match1 = book_lower.find(a_book_lower) else: match1 = a_book_lower.find(book_lower) if len(getList(book_lower)) >= len(getList(partial_name.lower())): match2 = book_lower.find(partial_name.lower()) else: match2 = partial_name.lower().find(book_lower) if match1 < match2: logger.debug( "Fuzz left change, prefer [%s] over [%s] for [%s]" % (a_book['BookName'], partial_name, book)) best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if best_ratio > 90: logger.debug( "Fuzz match ratio [%d] [%s] [%s]" % (best_ratio, book, ratio_name)) return ratio_id if best_partial > 85: logger.debug( "Fuzz match partial [%d] [%s] [%s]" % (best_partial, book, partial_name)) return partial_id if best_partname > 95: logger.debug( "Fuzz match partname [%d] [%s] [%s]" % (best_partname, book, partname_name)) return partname_id if books: logger.debug( 'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s], partname [%d,%s]' % (author, book, best_ratio, ratio_name, best_partial, partial_name, best_partname, partname_name)) else: logger.debug('No books found in database for %s' % author) return 0
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % str(e)) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for unwanted language, just warn # valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference, %s' % (bookname, bookLanguage)) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] else: logger.warning("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return bookname = unaccented(bookname) bookname, booksub = split_title(authorname, bookname) dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic).strip() booksub = replace_all(booksub, dic).strip() if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum is None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, refresh=False): try: api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % str(e)) return if rootxml is None: logger.debug("Error requesting author books") return if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 isbn_979_dict = { "10": "fre", "11": "kor", "12": "ita" } isbn_978_dict = { "0": "eng", "1": "eng", "2": "fre", "3": "ger", "4": "jap", "5": "rus" } while resultxml: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if book.find('isbn').text: find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if book.find('isbn13').text: find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn10 or isbn13 found # Try to use shortcut of ISBN identifier codes described here... # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups if isbnhead != "": if find_field == "isbn13" and isbn.startswith('979'): for item in isbn_979_dict: if isbnhead.startswith(item): bookLanguage = isbn_979_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead)) elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')): for item in isbn_978_dict: if isbnhead.startswith(item): bookLanguage = isbn_978_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead)) if bookLanguage == "Unknown": # Nothing in the isbn dictionary, try any cached results match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)) if match: bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if ('invalid' in resp or 'Unknown' in resp): bookLanguage = "Unknown" else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e))) if bookLanguage == "Unknown": # still no earlier match, we'll have to search the goodreads api try: if book.find(find_field).text: BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = get_xml_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.error("Error finding book results: %s" % str(e)) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" # At this point, give up? # WhatWork on author/title doesn't give us a language. # It might give us the "original language" of the book (but not always) # and our copy might not be in the original language anyway # eg "The Girl With the Dragon Tattoo" original language Swedish # If we have an isbn, try WhatISBN to get alternatives # in case any of them give us a language, but it seems if thinglang doesn't # have a language for the first isbn code, it doesn't for any of the # alternatives either # Goodreads search results don't include the language. Although sometimes # it's in the html page, it's not in the xml results if (isbnhead != ""): # if GR didn't give an isbn we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"Goodreads language search failed: %s" % str(e)) if bookLanguage not in valid_langs: logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage)) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) bookname, booksub = split_title(authorNameResult, bookname) dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # We use bookid, then reject if another author/title has a different bookid so we just keep one... find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] locked = resulted['Manual'] else: book_status = lazylibrarian.NEWBOOK_STATUS locked = False rejected = False if re.match('[^\w-]', bookname): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults = removedResults + 1 rejected = True if not rejected: find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' % (bookname, authorNameResult.replace('"', '""'))) if find_books: for find_book in find_books: if find_book['BookID'] != bookid: # we have a book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (find_book['BookID'], authorNameResult, bookname, bookid)) duplicates = duplicates + 1 rejected = True if not rejected: find_books = myDB.match('SELECT AuthorName,BookName FROM books WHERE BookID = "%s"' % bookid) if find_books: # we have a book with this bookid already if bookname != find_books['BookName'] or authorNameResult != find_books['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorNameResult, bookname, find_books['AuthorName'], find_books['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorNameResult, bookname)) duplicates = duplicates + 1 rejected = True if not rejected: if book_status != "Ignored": if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum is None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % str(e)) if resultxml: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None lastbook = myDB.match('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception as e: logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())