Exemplo n.º 1
0
 def search(self, name, type):
     GR = GoodReads(name, type)
     if len(name) == 0:
         raise cherrypy.HTTPRedirect("config")
     else:
         searchresults = GR.find_results(name)
     return serve_template(templatename="searchresults.html", title='Search Results for: "' + name + '"', searchresults=searchresults, type=type)
Exemplo n.º 2
0
def addAuthorToDB(authorname=None, refresh=False):

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.match(query)
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        if 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return


# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    update_totals(authorid)
    logger.debug("[%s] Author update complete" % authorname)
Exemplo n.º 3
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    
    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    GR.get_author_books(authorid)

    lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone()
    bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid)
    for count in bookCount:
		controlValueDict = {"AuthorID": authorid}
		newValueDict = {
			"Status": "Active",
			"TotalBooks": count['counter'],
			"LastBook": lastbook['BookName'],
			"LastLink": lastbook['BookLink'],
			"LastDate": lastbook['BookDate']
		}

		myDB.upsert("authors", newValueDict, controlValueDict)
		logger.info("Processing complete: Added %s books to the database" % str(count['counter']))
Exemplo n.º 4
0
def addAuthorToDB(authorname=None, refresh=False):

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        if 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    update_totals(authorid)
    logger.debug("[%s] Author update complete" % authorname)
Exemplo n.º 5
0
 def search(self, name, type):
     GR = GoodReads(name, type)
     if len(name) == 0:
         raise cherrypy.HTTPRedirect("config")
     else:
         searchresults = GR.find_results(name)
     return serve_template(templatename="searchresults.html",
                           title='Search Results for: "' + name + '"',
                           searchresults=searchresults,
                           type=type)
Exemplo n.º 6
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    logger.debug("[%s] Author update complete" % authorname)
Exemplo n.º 7
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"
    #type = 'author'

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
        }
        logger.info("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.info("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    logger.info("[%s] Author update complete" % authorname)
Exemplo n.º 8
0
def import_book(bookid, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, "Wanted"]).start()
        else:
            GB.find_book(bookid, "Wanted")
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, "Wanted"]).start()
        else:
            GR.find_book(bookid, "Wanted")
Exemplo n.º 9
0
def import_book(bookid, ebook=None, audio=None, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book
        ebook/audio=None makes find_book use configured default """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, ebook, audio]).start()
        else:
            GB.find_book(bookid, ebook, audio)
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, ebook, audio]).start()
        else:
            GR.find_book(bookid, ebook, audio)
Exemplo n.º 10
0
    def search(self, name):
        myDB = database.DBConnection()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            GB = GoogleBooks(name)
            queue = Queue.Queue()
            search_api = threading.Thread(target=GB.find_results, args=[name, queue])
            search_api.start()
        elif lazylibrarian.BOOK_API == "GoodReads":
            queue = Queue.Queue()
            GR = GoodReads(name)
            search_api = threading.Thread(target=GR.find_results, args=[name, queue])
            search_api.start()
        if len(name) == 0:
            raise cherrypy.HTTPRedirect("config")

        search_api.join()
        searchresults = queue.get()

        authorsearch = myDB.select("SELECT * from authors")
        authorlist = []
        for item in authorsearch:
            authorlist.append(item['AuthorName'])

        booksearch = myDB.select("SELECT * from books")
        booklist = []
        for item in booksearch:
            booklist.append(item['BookID'])

        sortedlist_final = sorted(searchresults, key=itemgetter('highest_fuzz', 'num_reviews'), reverse=True)
        return serve_template(templatename="searchresults.html", title='Search Results for: "' + name + '"', searchresults=sortedlist_final, authorlist=authorlist, booklist=booklist, booksearch=booksearch, type=type)
Exemplo n.º 11
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books

    GR.get_author_books(authorid)
Exemplo n.º 12
0
def import_book(bookid, ebook=None, audio=None, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book
        ebook/audio=None makes find_book use configured default """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book,
                                 name='GB-IMPORT',
                                 args=[bookid, ebook, audio]).start()
        else:
            GB.find_book(bookid, ebook, audio)
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book,
                                 name='GR-IMPORT',
                                 args=[bookid, ebook, audio]).start()
        else:
            GR.find_book(bookid, ebook, audio)
Exemplo n.º 13
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    
    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books

    GR.get_author_books(authorid)
Exemplo n.º 14
0
    def _addBook(self, **kwargs):
        if 'id' not in kwargs:
            self.data = 'Missing parameter: id'
            return

        if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
            GB = GoogleBooks(kwargs['id'])
            threading.Thread(target=GB.find_book, name='API-GBRESULTS', args=[kwargs['id']]).start()
        else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
            GR = GoodReads(kwargs['id'])
            threading.Thread(target=GR.find_book, name='API-GRRESULTS', args=[kwargs['id']]).start()
Exemplo n.º 15
0
def import_book(bookid):
    """ search goodreads or googlebooks for a bookid and import the book """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        _ = threading.Thread(target=GB.find_book,
                             name='GB-IMPORT',
                             args=[bookid]).start()
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        _ = threading.Thread(target=GR.find_book,
                             name='GR-RESULTS',
                             args=[bookid]).start()
Exemplo n.º 16
0
    def addBook(self, bookid=None):
        myDB = database.DBConnection()

        booksearch = myDB.select("SELECT * from books WHERE BookID=?", [bookid])
        if booksearch:
            myDB.upsert("books", {'Status': 'Wanted'}, {'BookID': bookid})
            for book in booksearch:
                AuthorName = book['AuthorName']
                authorsearch = myDB.select("SELECT * from authors WHERE AuthorName=?", [AuthorName])
                if authorsearch:
                    #update authors needs to be updated every time a book is marked differently
                    lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' AND Status != 'Ignored' order by BookDate DESC" % AuthorName).fetchone()
                    unignoredbooks = myDB.select("SELECT COUNT(BookName) as unignored FROM books WHERE AuthorName='%s' AND Status != 'Ignored'" % AuthorName)
                    bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorName='%s'" % AuthorName)  
                    countbooks = myDB.action('SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % AuthorName).fetchone()
                    havebooks = int(countbooks[0]) 

                    controlValueDict = {"AuthorName": AuthorName}
                    newValueDict = {
                            "TotalBooks": bookCount[0]['counter'],
                            "UnignoredBooks": unignoredbooks[0]['unignored'],
                            "HaveBooks": havebooks,
                            "LastBook": lastbook['BookName'],
                            "LastLink": lastbook['BookLink'],
                            "LastDate": lastbook['BookDate']
                            }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            if lazylibrarian.BOOK_API == "GoogleBooks":
                GB = GoogleBooks(bookid)
                queue = Queue.Queue()
                find_book = threading.Thread(target=GB.find_book, args=[bookid, queue])
                find_book.start()
            elif lazylibrarian.BOOK_API == "GoodReads":
                queue = Queue.Queue()
                GR = GoodReads(bookid)
                find_book = threading.Thread(target=GR.find_book, args=[bookid, queue])
                find_book.start()
            if len(bookid) == 0:
                raise cherrypy.HTTPRedirect("config")

            find_book.join()

        books = []
        mags = False
        books.append({"bookid": bookid})
        threading.Thread(target=searchbook, args=[books, mags]).start()

        raise cherrypy.HTTPRedirect("books")
Exemplo n.º 17
0
def search_for(searchterm):
    """ search goodreads or googlebooks for a searchterm, return a list of results
    """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(searchterm)
        myqueue = queue.Queue()
        search_api = threading.Thread(target=GB.find_results, name='GB-RESULTS', args=[searchterm, myqueue])
        search_api.start()
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        myqueue = queue.Queue()
        GR = GoodReads(searchterm)
        search_api = threading.Thread(target=GR.find_results, name='GR-RESULTS', args=[searchterm, myqueue])
        search_api.start()

    search_api.join()
    searchresults = myqueue.get()
    sortedlist = sorted(searchresults, key=itemgetter('highest_fuzz', 'num_reviews'), reverse=True)
    return sortedlist
Exemplo n.º 18
0
    def _findBook(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return

        if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
            GB = GoogleBooks(kwargs['name'])
            queue = Queue.Queue()
            search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[kwargs['name'], queue])
            search_api.start()
        else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
            queue = Queue.Queue()
            GR = GoodReads(kwargs['name'])
            search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[kwargs['name'], queue])
            search_api.start()

        search_api.join()
        self.data = queue.get()
Exemplo n.º 19
0
    def _findAuthor(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return

        myDB = database.DBConnection()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            GB = GoogleBooks(kwargs['name'])
            queue = Queue.Queue()
            search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[kwargs['name'], queue])
            search_api.start()
        elif lazylibrarian.BOOK_API == "GoodReads":
            queue = Queue.Queue()
            GR = GoodReads(kwargs['name'])
            search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[kwargs['name'], queue])
            search_api.start()

        search_api.join()
        self.data = queue.get()
Exemplo n.º 20
0
    def _findAuthor(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return

        authorname = formatAuthorName(kwargs['name'])
        if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
            GB = GoogleBooks(authorname)
            myqueue = queue.Queue()
            search_api = threading.Thread(target=GB.find_results,
                                          name='API-GBRESULTS',
                                          args=[authorname, myqueue])
            search_api.start()
        else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
            GR = GoodReads(authorname)
            myqueue = queue.Queue()
            search_api = threading.Thread(target=GR.find_results,
                                          name='API-GRRESULTS',
                                          args=[authorname, myqueue])
            search_api.start()

        search_api.join()
        self.data = myqueue.get()
Exemplo n.º 21
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.info(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep, 1)[0]
                                    coverimg = os.path.join(bookdir, 'cover.jpg')
                                    cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database" %
                                    (book, author))


    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                    (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                    (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)'])))
            logger.debug("Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug("Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)'])))
            logger.debug("Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug("Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug("Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug("Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
            nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
    if len(images):
        logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

    images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
    if len(images):
        logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count
Exemplo n.º 22
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()
        if not lazylibrarian.GB_API:
            logger.warn("No GoogleBooks API key, check config")
        URL = "https://www.googleapis.com/books/v1/volumes/" + str(bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults, in_cache = self.get_request(URL)

        #  Darkie67:
        #        replacing German Umlauts and filtering out ":"
        #
        #        booknamealt = jsonresults['volumeInfo']['title']
        #        booknametmp1=booknamealt.replace(u'\xf6',u'oe')
        #        booknametmp2=booknametmp1.replace(u'\xe4',u'ae')
        #        booknametmp3=booknametmp2.replace(u'\xdf',u'ss')
        #        booknametmp4=booknametmp3.replace(u'\xc4',u'Ae')
        #        booknametmp5=booknametmp4.replace(u'\xdc',u'Ue')
        #        booknametmp6=booknametmp5.replace(u'\xd6',u'Oe')
        #        booknametmp7=booknametmp6.replace(':','')
        #        bookname=booknametmp7.replace(u'\xfc',u'ue')
        bookname = jsonresults["volumeInfo"]["title"]
        bookname = bookname.replace(":", "").replace('"', "").replace("'", "")
        bookname = unidecode(u"%s" % bookname)
        bookname = bookname.strip()  # strip whitespace
        # Darkie67 end
        try:
            authorname = jsonresults["volumeInfo"]["authors"][0]
        except KeyError:
            logger.debug("Book %s does not contain author field, skipping" % bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults["volumeInfo"]["language"]
            valid_langs = [valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(",")]
            if booklang not in valid_langs:
                logger.debug("Book %s language does not match preference" % bookname)
        except KeyError:
            logger.debug("Book does not have language field")
            booklang = "Unknown"

        try:
            bookpub = jsonresults["volumeInfo"]["publisher"]
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults["volumeInfo"]["subtitle"]
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults["volumeInfo"]["publishedDate"]
        except KeyError:
            bookdate = "0000-00-00"

        try:
            bookimg = jsonresults["volumeInfo"]["imageLinks"]["thumbnail"]
        except KeyError:
            bookimg = "images/nocover.png"

        try:
            bookrate = jsonresults["volumeInfo"]["averageRating"]
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults["volumeInfo"]["pageCount"]
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults["volumeInfo"]["categories"][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults["volumeInfo"]["description"]
        except KeyError:
            bookdesc = None

        try:
            if jsonresults["volumeInfo"]["industryIdentifiers"][0]["type"] == "ISBN_10":
                bookisbn = jsonresults["volumeInfo"]["industryIdentifiers"][0]["identifier"]
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults["volumeInfo"]["canonicalVolumeLink"]
        bookrate = float(bookrate)

        name = jsonresults["volumeInfo"]["authors"][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author["authorid"]

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)
Exemplo n.º 23
0
def addAuthorToDB(authorname=None, refresh=False):
    """
    Add an author to the database, and get  list of all their books
    If author already exists in database, refresh their details and booklist
    """
    try:
        myDB = database.DBConnection()

        GR = GoodReads(authorname)

        query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
        dbauthor = myDB.match(query)
        controlValueDict = {"AuthorName": authorname}

        if not dbauthor:
            newValueDict = {
                "AuthorID": "0: %s" % (authorname),
                "Status": "Loading"
            }
            logger.debug("Now adding new author: %s to database" % authorname)
        else:
            newValueDict = {"Status": "Loading"}
            logger.debug("Now updating author: %s" % authorname)
        myDB.upsert("authors", newValueDict, controlValueDict)

        author = GR.find_author_id(refresh=refresh)
        if author:
            authorid = author['authorid']
            authorlink = author['authorlink']
            authorimg = author['authorimg']
            controlValueDict = {"AuthorName": authorname}
            newValueDict = {
                "AuthorID": authorid,
                "AuthorLink": authorlink,
                "AuthorImg": authorimg,
                "AuthorBorn": author['authorborn'],
                "AuthorDeath": author['authordeath'],
                "DateAdded": today(),
                "Status": "Loading"
            }
            myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn(u"Nothing found for %s" % authorname)
            myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
            return

        new_img = False
        if authorimg and 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
            new_img = True
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
                new_img = True

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)


        # process books
        if lazylibrarian.BOOK_API == "GoogleBooks":
            book_api = GoogleBooks()
            book_api.get_author_books(authorid, authorname, refresh=refresh)
        elif lazylibrarian.BOOK_API == "GoodReads":
            GR.get_author_books(authorid, authorname, refresh=refresh)

        # update totals works for existing authors only.
        # New authors need their totals updating after libraryscan or import of books.
        if dbauthor:
            update_totals(authorid)
        logger.debug("[%s] Author update complete" % authorname)
    except Exception as e:
        logger.error('Unhandled exception in addAuthorToDB: %s' % traceback.format_exc())
Exemplo n.º 24
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()
        if not lazylibrarian.GB_API:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
            str(bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults, in_cache = self.get_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookname)
            return

        bookname = jsonresults['volumeInfo']['title']
        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug(
                'Book %s does not contain author field, skipping' %
                bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted
            # this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
            if booklang not in valid_langs:
                logger.debug(
                    'Book %s language does not match preference' %
                    bookname)
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = None
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = None
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo'][
                    'industryIdentifiers'][0]['identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = bookwork.getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)
         
            elif bookimg.startswith('http'):
                link = bookwork.cache_cover(bookid, bookimg)
                if link is not None:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            # try to get series info from librarything
            series, seriesNum = bookwork.getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = bookwork.getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemplo n.º 25
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))
        
    # to save repeat-scans of the same directory if it contains multiple formats of the same book, 
    # keep track of which directories we've already looked at 
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode('utf-8')

            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if formatter.is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (dir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split('.')
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.debug(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
    if stats['sum(GR_book_hits)'] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select('select AuthorName from authors')
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug('Updating bookcounts for %i authors' % len(authors))
    for author in authors:
        name = author['AuthorName']
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
            name).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone()        
        myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name))
        unignoredbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
            name).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name))

    covers = myDB.action("select  count('bookimg') as counter from books where bookimg like 'http%'").fetchone()
    logger.info("Caching covers for %s books" % covers['counter'])

    images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"')
    for item in images:
        bookid = item['bookid']
        bookimg = item['bookimg']
        bookname = item['bookname']
        newimg = bookwork.cache_cover(bookid, bookimg)
        if newimg != bookimg:
            myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))
    logger.info('Library scan complete')
Exemplo n.º 26
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)
    GB = GoogleBooks(bookid, type)

    # process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?',
                         [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"}
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName": book['authorname'],
            "BookName": book['bookname'],
            "BookDesc": book['bookdesc'],
            "BookIsbn": book['bookisbn'],
            "BookImg": book['bookimg'],
            "BookLink": book['booklink'],
            "BookRate": book['bookrate'],
            "BookPages": book['bookpages'],
            "BookDate": book['bookdate'],
            "BookLang": book['booklang'],
            "Status": "Skipped",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)


# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'",
                           [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName": "Authorname: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": author['authorid'],
            "AuthorLink": author['authorlink'],
            "AuthorImg": author['authorimg'],
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
Exemplo n.º 27
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()

        URL = "https://www.googleapis.com/books/v1/volumes/" + str(bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read())

        bookname = jsonresults["volumeInfo"]["title"]

        try:
            authorname = jsonresults["volumeInfo"]["authors"][0]
        except KeyError:
            logger.debug("Book %s does not contain author field" % bookname)

        try:
            # skip if language is in ignore list
            booklang = jsonresults["volumeInfo"]["language"]
            valid_langs = [valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(",")]
            if booklang not in valid_langs:
                logger.debug("Book %s language does not match preference" % bookname)
        except KeyError:
            logger.debug("Book does not have language field")

        try:
            bookpub = jsonresults["volumeInfo"]["publisher"]
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults["volumeInfo"]["subtitle"]
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults["volumeInfo"]["publishedDate"]
        except KeyError:
            bookdate = "0000-00-00"

        try:
            bookimg = jsonresults["volumeInfo"]["imageLinks"]["thumbnail"]
        except KeyError:
            bookimg = "images/nocover.png"

        try:
            bookrate = jsonresults["volumeInfo"]["averageRating"]
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults["volumeInfo"]["pageCount"]
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults["volumeInfo"]["categories"][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults["volumeInfo"]["description"]
        except KeyError:
            bookdesc = None

        try:
            if jsonresults["volumeInfo"]["industryIdentifiers"][0]["type"] == "ISBN_10":
                bookisbn = jsonresults["volumeInfo"]["industryIdentifiers"][0]["identifier"]
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults["volumeInfo"]["canonicalVolumeLink"]
        bookrate = float(bookrate)

        name = jsonresults["volumeInfo"]["authors"][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author["authorid"]

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)
Exemplo n.º 28
0
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True):
    """
    Add an author to the database by name or id, and optionally get a list of all their books
    If author already exists in database, refresh their details and optionally booklist
    """
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "AddAuthorToDB"
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        match = False
        author = None
        authorimg = ''
        new_author = not refresh
        entry_status = ''

        if authorid:
            dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (authorid,))
            if not dbauthor:
                authorname = 'unknown author'
                logger.debug("Adding new author id %s to database" % authorid)
                new_author = True
            else:
                entry_status = dbauthor['Status']
                authorname = dbauthor['authorname']
                logger.debug("Updating author %s " % authorname)
                new_author = False

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            if new_author:
                newValueDict["AuthorName"] = "Loading"
                newValueDict["AuthorImg"] = "images/nophoto.png"
            myDB.upsert("authors", newValueDict, controlValueDict)

            GR = GoodReads(authorid)
            author = GR.get_author_info(authorid=authorid)
            if author:
                authorname = author['authorname']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today()
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']
                    if not dbauthor:
                        newValueDict["AuthorName"] = author['authorname']
                    elif dbauthor['authorname'] != author['authorname']:
                        authorname = dbauthor['authorname']
                        logger.warn("Authorname mismatch for %s [%s][%s]" %
                                    (authorid, dbauthor['authorname'], author['authorname']))
                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn("Nothing found for %s" % authorid)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorID=?', (authorid,))

        if authorname and author and not match:
            authorname = ' '.join(authorname.split())  # ensure no extra whitespace
            GR = GoodReads(authorname)
            author = GR.find_author_id(refresh=refresh)

            dbauthor = myDB.match("SELECT * from authors WHERE AuthorName=?", (authorname,))
            if author and not dbauthor:  # may have different name for same authorid (spelling?)
                dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (author['authorid'],))
                authorname = dbauthor['AuthorName']

            controlValueDict = {"AuthorName": authorname}

            if not dbauthor:
                newValueDict = {
                    "AuthorID": "0: %s" % authorname,
                    "Status": "Loading"
                }
                logger.debug("Now adding new author: %s to database" % authorname)
                entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
                new_author = True
            else:
                newValueDict = {"Status": "Loading"}
                logger.debug("Now updating author: %s" % authorname)
                entry_status = dbauthor['Status']
                new_author = False
            myDB.upsert("authors", newValueDict, controlValueDict)

            if author:
                authorid = author['authorid']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorName": authorname}
                newValueDict = {
                    "AuthorID": author['authorid'],
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today(),
                    "Status": "Loading"
                }
                if dbauthor:
                    if authorname != dbauthor['authorname']:
                        # name change might be users preference
                        logger.warn("Conflicting authorname for %s [%s][%s] Ignoring change" %
                                    (author['authorid'], authorname, dbauthor['authorname']))
                        authorname = dbauthor['authorname']
                        # cmd = 'UPDATE authors SET AuthorName=? WHERE AuthorName=?'
                        # myDB.action(cmd, (author['authorname'], dbauthor['authorname']))
                    if author['authorid'] != dbauthor['authorid']:
                        # GoodReads may have altered authorid?
                        logger.warn("Conflicting authorid for %s (%s:%s) Moving to new authorid" %
                                    (authorname, author['authorid'], dbauthor['authorid']))
                        cmd = 'UPDATE books SET AuthorID=? WHERE AuthorID=?'
                        myDB.action(cmd, (author['authorid'], dbauthor['authorid']))
                        myDB.action('DELETE from authors WHERE AuthorID=?', (dbauthor['authorid'],))
                        dbauthor = None

                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn("Nothing found for %s" % authorname)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorName=?', (authorname,))
                return
        if not match:
            logger.error("No matching result for authorname or authorid")
            return

        # if author is set to manual, should we allow replacing 'nophoto' ?
        new_img = False
        match = myDB.match("SELECT Manual from authors WHERE AuthorID=?", (authorid,))
        if not match or not match['Manual']:
            if authorimg and 'nophoto' in authorimg:
                newimg = getAuthorImage(authorid)
                if newimg:
                    authorimg = newimg
                    new_img = True

        # allow caching
        if authorimg and authorimg.startswith('http'):
            newimg, success, _ = cache_img("author", authorid, authorimg, refresh=refresh)
            if success:
                authorimg = newimg
                new_img = True
            else:
                logger.debug('Failed to cache image for %s' % authorimg)

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)

        if addbooks:
            if new_author:
                bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
                audiostatus = lazylibrarian.CONFIG['NEWAUTHOR_AUDIO']
            else:
                bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']
                audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS']

            if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']:
                entry_status = 'Active'  # default for invalid/unknown or "loading"
            # process books
            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                if lazylibrarian.CONFIG['GB_API']:
                    book_api = GoogleBooks()
                    book_api.get_author_books(authorid, authorname, bookstatus=bookstatus,
                                              audiostatus=audiostatus, entrystatus=entry_status,
                                              refresh=refresh)
                # if lazylibrarian.CONFIG['GR_API']:
                #     book_api = GoodReads(authorname)
                #     book_api.get_author_books(authorid, authorname, bookstatus=bookstatus,
                #                               ausiostatus=audiostatus, entrystatus=entry_status,
                #                               refresh=refresh)
            elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                if lazylibrarian.CONFIG['GR_API']:
                    book_api = GoodReads(authorname)
                    book_api.get_author_books(authorid, authorname, bookstatus=bookstatus,
                                              audiostatus=audiostatus, entrystatus=entry_status,
                                              refresh=refresh)
                # if lazylibrarian.CONFIG['GB_API']:
                #     book_api = GoogleBooks()
                #     book_api.get_author_books(authorid, authorname, bookstatus=bookstatus,
                #                               audiostatus=audiostatus, entrystatus=entry_status,
                #                               refresh=refresh)

            update_totals(authorid)

            if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']:
                res = grfollow(authorid, True)
                if res.startswith('Unable'):
                    logger.warn(res)
                try:
                    followid = res.split("followid=")[1]
                    logger.debug('%s marked followed' % authorname)
                except IndexError:
                    followid = ''
                myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?', (followid, authorid))
        else:
            # if we're not loading any books, mark author as ignored
            entry_status = 'Ignored'

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": entry_status}
        myDB.upsert("authors", newValueDict, controlValueDict)

        msg = "[%s] Author update complete, status %s" % (authorname, entry_status)
        logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Exemplo n.º 29
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn('Cannot find directory: %s. Not scanning' %
                    dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()
    new_authors = []

    logger.info('Scanning ebook directory: %s' %
                dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    book_list = []
    new_book_count = 0
    file_count = 0
    book_exists = False

    if (lazylibrarian.FULL_SCAN):
        books = myDB.select(
            'select AuthorName, BookName from books where Status=?', [u'Open'])
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            for book_type in getList(lazylibrarian.EBOOK_TYPE):
                bookName = book['BookName']
                bookAuthor = book['AuthorName']
                #Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', bookAuthor).replace('$Title', bookName)
                #dest_path = authorname+'/'+bookname
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', bookAuthor).replace('$Title', bookName)

                encoded_book_path = os.path.join(
                    dir, dest_path, global_name + "." + book_type).encode(
                        lazylibrarian.SYS_ENCODING)
                if os.path.isfile(encoded_book_path):
                    book_exists = True
            if not book_exists:
                myDB.action(
                    'update books set Status=? where AuthorName=? and BookName=?',
                    [status, bookAuthor, bookName])
                logger.info('Book %s updated as not found on disk' %
                            encoded_book_path.decode(
                                lazylibrarian.SYS_ENCODING, 'replace'))
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    latest_subdirectory = []
    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            #prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            subdirectory = r.replace(dir, '')
            latest_subdirectory.append(subdirectory)
            logger.info(
                "[%s] Now scanning subdirectory %s" %
                (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'),
                 subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
            matchString = ''
            for char in lazylibrarian.EBOOK_DEST_FILE:
                matchString = matchString + '\\' + char
            #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
            booktypes = ''
            count = -1
            booktype_list = getList(lazylibrarian.EBOOK_TYPE)
            for book_type in booktype_list:
                count += 1
                if count == 0:
                    booktypes = book_type
                else:
                    booktypes = booktypes + '|' + book_type
            matchString = matchString.replace(
                "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
                    "\\$\\T\\i\\t\\l\\e",
                    "(?P<book>.*?)") + '\.[' + booktypes + ']'
            #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE)
            pattern = re.compile(matchString, re.VERBOSE)
            match = pattern.match(files)
            if match:
                author = match.group("author")
                book = match.group("book")
                #check if book is in database, and not marked as in library
                check_exist_book = myDB.action(
                    "SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",
                    [author, book, 'Open']).fetchone()
                if not check_exist_book:
                    check_exist_author = myDB.action(
                        "SELECT * FROM authors where AuthorName=?",
                        [author]).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            continue
                        #only try to add if GR data matches found author data
                        if author_gr:
                            authorid = author_gr['authorid']
                            authorlink = author_gr['authorlink']
                            pageIdx = authorlink.rfind('/')
                            authorlink = authorlink[pageIdx + 1:]
                            #match_auth = authorid+"."+author.replace('. ','_')
                            #Original Line does not allow author match.
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_auth = authorid + "." + match_auth
                            # Hopefully someone can come up with a more efficient way of doing this.
                            logger.debug(match_auth)
                            logger.debug(authorlink)
                            if match_auth == authorlink:
                                logger.info("Adding %s" % author)
                                try:
                                    importer.addAuthorToDB(author)
                                except:
                                    continue
                                check_exist_book = myDB.action(
                                    "SELECT * FROM books where AuthorName=? and BookName=?",
                                    [author, book]).fetchone()
                                if check_exist_book:
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    myDB.action(
                                        'UPDATE books set Status=? where AuthorName=? and BookName=?',
                                        ['Open', author, book])
                                    new_book_count += 1
                            else:
                                logger.info(
                                    "Unable to match %s in GoodReads database"
                                    % author)

                else:
                    if author not in new_authors:
                        new_authors.append(author)
                    myDB.action(
                        'UPDATE books set Status=? where AuthorName=? and BookName=?',
                        ['Open', author, book])
                    new_book_count += 1

                file_count += 1

    logger.info("%s new/modified books found and added to the database" %
                new_book_count)
    logger.info('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select(
                'select BookName from Books where status=? and AuthorName=?',
                ['Open', auth]))
        myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',
                    [havebooks, auth])
        totalbooks = len(
            myDB.select(
                'select BookName from Books where status!=? and AuthorName=?',
                ['Ignored', auth]))
        myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',
                    [totalbooks, auth])

    logger.info('Library scan complete')
Exemplo n.º 30
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' % bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,))
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],))
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                 (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" % (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' % (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemplo n.º 31
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.googleapis.com/books/v1/volumes/' + str(
            bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults = json.JSONDecoder().decode(
            urllib2.urlopen(URL, timeout=30).read())

        bookname = jsonresults['volumeInfo']['title']

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field' % bookname)

        try:
            #skip if language is in ignore list
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([
                valid_lang.strip()
                for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
            ])
            if booklang not in valid_langs:
                logger.debug('Book %s language does not match preference' %
                             bookname)
        except KeyError:
            logger.debug('Book does not have language field')

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)
Exemplo n.º 32
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    havebooks = myDB.action(
        'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
        authorname).fetchone()
    myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], authorname))
    totalbooks = myDB.action(
        'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % authorname).fetchone()        
    myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], authorname))
    unignoredbooks = myDB.action(
        'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
        authorname).fetchone()
    myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], authorname))

    logger.debug("[%s] Author update complete" % authorname)
Exemplo n.º 33
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.googleapis.com/books/v1/volumes/' + str(bookid) + "?key="+lazylibrarian.GB_API
        jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read())

#  Darkie67:
#        replacing German Umlauts and filtering out ":"
#
        booknamealt = jsonresults['volumeInfo']['title']
        booknametmp1=booknamealt.replace(u'\xf6',u'oe')
        booknametmp2=booknametmp1.replace(u'\xe4',u'ae')
        booknametmp3=booknametmp2.replace(u'\xdf',u'ss')
        booknametmp4=booknametmp3.replace(u'\xc4',u'Ae')
        booknametmp5=booknametmp4.replace(u'\xdc',u'Ue')
        booknametmp6=booknametmp5.replace(u'\xd6',u'Oe')
        booknametmp7=booknametmp6.replace(':','')
        bookname=booknametmp7.replace(u'\xfc',u'ue')
# Darkie67 end        
        
        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field' % bookname)

        try:
            #skip if language is in ignore list
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
            if booklang not in valid_langs:
                logger.debug('Book %s language does not match preference' % bookname)
        except KeyError:
            logger.debug('Book does not have language field')

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0]['identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName":   authorname,
            "AuthorID":     AuthorID,
            "AuthorLink":   "",
            "BookName":     bookname,
            "BookSub":      booksub,
            "BookDesc":     bookdesc,
            "BookIsbn":     bookisbn,
            "BookPub":      bookpub,
            "BookGenre":    bookgenre,
            "BookImg":      bookimg,
            "BookLink":     booklink,
            "BookRate":     bookrate,
            "BookPages":    bookpages,
            "BookDate":     bookdate,
            "BookLang":     booklang,
            "Status":       "Wanted",
            "BookAdded":    formatter.today()
            }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)
Exemplo n.º 34
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()
        if not lazylibrarian.GB_API:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + str(
            bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults, in_cache = self.get_request(URL)

        #  Darkie67:
        #        replacing German Umlauts and filtering out ":"
        #
        #        booknamealt = jsonresults['volumeInfo']['title']
        #        booknametmp1=booknamealt.replace(u'\xf6',u'oe')
        #        booknametmp2=booknametmp1.replace(u'\xe4',u'ae')
        #        booknametmp3=booknametmp2.replace(u'\xdf',u'ss')
        #        booknametmp4=booknametmp3.replace(u'\xc4',u'Ae')
        #        booknametmp5=booknametmp4.replace(u'\xdc',u'Ue')
        #        booknametmp6=booknametmp5.replace(u'\xd6',u'Oe')
        #        booknametmp7=booknametmp6.replace(':','')
        #        bookname=booknametmp7.replace(u'\xfc',u'ue')
        bookname = jsonresults['volumeInfo']['title']
        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace
        # Darkie67 end
        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([
                valid_lang.strip()
                for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
            ])
            if booklang not in valid_langs:
                logger.debug('Book %s language does not match preference' %
                             bookname)
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)
Exemplo n.º 35
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    GB = GoogleBooks(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    bookscount = 0
    books = GB.find_results()
    for book in books:

        # this is for rare cases where google returns multiple authors who share nameparts
        if book['authorname'] == authorname:

            controlValueDict = {"BookID": book['bookid']}
            newValueDict = {
                "AuthorName":   book['authorname'],
                "AuthorID":     authorid,
                "AuthorLink":   authorimg,
                "BookName":     book['bookname'],
                "BookSub":      book['booksub'],
                "BookDesc":     book['bookdesc'],
                "BookIsbn":     book['bookisbn'],
                "BookPub":      book['bookpub'],
                "BookGenre":    book['bookgenre'],
                "BookImg":      book['bookimg'],
                "BookLink":     book['booklink'],
                "BookRate":     book['bookrate'],
                "BookPages":    book['bookpages'],
                "BookDate":     book['bookdate'],
                "BookLang":     book['booklang'],
                "Status":       "Skipped",
                "BookAdded":    formatter.today()
                }

            myDB.upsert("books", newValueDict, controlValueDict)
            bookscount = bookscount+1 

    lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC" % authorname).fetchone()
    controlValueDict = {"AuthorName": authorname}
    newValueDict = {
        "Status": "Active",
        "TotalBooks": bookscount,
        "LastBook": lastbook['BookName'],
        "LastLink": lastbook['BookLink'],
        "LastDate": lastbook['BookDate']
        }

    myDB.upsert("authors", newValueDict, controlValueDict)
    logger.info("Processing complete: Added %s books to the database" % bookscount)
Exemplo n.º 36
0
def addAuthorNameToDB(author=None, refresh=False, addbooks=True):
    # get authors name in a consistent format, look them up in the database
    # if not in database, try to import them.
    # return authorname,authorid,new where new=False if author already in db, new=True if added
    # authorname returned is our preferred name, or empty string if not found or unable to add

    new = False
    if not author or len(author) < 2:
        logger.debug('Invalid Author Name [%s]' % author)
        return "", "", False

    author = formatAuthorName(author)
    myDB = database.DBConnection()

    # Check if the author exists, and import the author if not,
    check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorName=?', (author,))

    # If no exact match, look for a close fuzzy match to handle misspellings, accents
    if not check_exist_author:
        match_name = author.lower()
        res = myDB.action('select AuthorID,AuthorName from authors')
        for item in res:
            aname = item['AuthorName']
            if aname:
                match_fuzz = fuzz.ratio(aname.lower(), match_name)
                if match_fuzz >= 95:
                    logger.debug("Fuzzy match [%s] %s%% for [%s]" % (item['AuthorName'], match_fuzz, author))
                    check_exist_author = item
                    author = item['AuthorName']
                    break

    if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']:
        logger.debug('Author %s not found in database, trying to add' % author)
        # no match for supplied author, but we're allowed to add new ones
        GR = GoodReads(author)
        try:
            author_gr = GR.find_author_id()
        except Exception as e:
            logger.warn("%s finding author id for [%s] %s" % (type(e).__name__, author, str(e)))
            return "", "", False

        # only try to add if GR data matches found author data
        if author_gr:
            authorname = author_gr['authorname']
            # authorid = author_gr['authorid']
            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
            match_auth = author.replace('.', ' ')
            match_auth = ' '.join(match_auth.split())

            match_name = authorname.replace('.', ' ')
            match_name = ' '.join(match_name.split())

            match_name = unaccented(match_name)
            match_auth = unaccented(match_auth)

            # allow a degree of fuzziness to cater for different accented character handling.
            # some author names have accents,
            # filename may have the accented or un-accented version of the character
            # The currently non-configurable value of fuzziness might need to go in config
            # We stored GoodReads unmodified author name in
            # author_gr, so store in LL db under that
            # fuzz.ratio doesn't lowercase for us
            match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
            if match_fuzz < 90:
                logger.debug("Failed to match author [%s] to authorname [%s] fuzz [%d]" %
                             (author, match_name, match_fuzz))

            # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
            if (author != "Unknown") and (match_fuzz >= 90):
                # use "intact" name for author that we stored in
                # GR author_dict, not one of the various mangled versions
                # otherwise the books appear to be by a different author!
                author = author_gr['authorname']
                authorid = author_gr['authorid']
                # this new authorname may already be in the
                # database, so check again
                check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,))
                if check_exist_author:
                    logger.debug('Found goodreads authorname %s in database' % author)
                else:
                    logger.info("Adding new author [%s]" % author)
                    try:
                        addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks)
                        check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,))
                        if check_exist_author:
                            new = True
                    except Exception as e:
                        logger.error('Failed to add author [%s] to db: %s %s' % (author, type(e).__name__, str(e)))
    # check author exists in db, either newly loaded or already there
    if not check_exist_author:
        logger.debug("Failed to match author [%s] in database" % author)
        return "", "", False
    author = makeUnicode(author)
    return author, check_exist_author['AuthorID'], new
Exemplo n.º 37
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)

# process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {
            "BookID":   "BookID: %s" % (bookid),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName":   book['authorname'],
            "BookName":     book['bookname'],
            "BookDesc":     book['bookdesc'],
            "BookIsbn":     book['bookisbn'],
            "BookImg":      book['bookimg'],
            "BookLink":     book['booklink'],
            "BookRate":     book['bookrate'],
            "BookPages":    book['bookpages'],
            "BookDate":     book['bookdate'],
            "BookLang":     book['booklang'],
            "Status":       "Skipped",
            "BookAdded":    formatter.today()
            }

        myDB.upsert("books", newValueDict, controlValueDict)

# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName":   "Authorname: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     author['authorid'],
            "AuthorLink":   author['authorlink'],
            "AuthorImg":    author['authorimg'],
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
Exemplo n.º 38
0
def addAuthorToDB(authorname=None,
                  refresh=False,
                  authorid=None,
                  addbooks=True):
    """
    Add an author to the database by name or id, and optionally get a list of all their books
    If author already exists in database, refresh their details and optionally booklist
    """
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "AddAuthorToDB"
    try:
        myDB = database.DBConnection()
        match = False
        authorimg = ''
        new_author = not refresh
        entry_status = ''

        if authorid:
            dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?",
                                  (authorid, ))
            if not dbauthor:
                authorname = 'unknown author'
                logger.debug("Adding new author id %s to database" % authorid)
                new_author = True
            else:
                entry_status = dbauthor['Status']
                authorname = dbauthor['authorname']
                logger.debug("Updating author %s " % authorname)
                new_author = False

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            GR = GoodReads(authorname)
            author = GR.get_author_info(authorid=authorid)
            if author:
                authorname = author['authorname']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today()
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorName"] = author['authorname']
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorid)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorID=?',
                                (authorid, ))

        if authorname and not match:
            authorname = ' '.join(
                authorname.split())  # ensure no extra whitespace
            GR = GoodReads(authorname)
            author = GR.find_author_id(refresh=refresh)

            query = "SELECT * from authors WHERE AuthorName=?"
            dbauthor = myDB.match(query, (authorname.replace("'", "''"), ))
            if author and not dbauthor:  # may have different name for same authorid (spelling?)
                query = "SELECT * from authors WHERE AuthorID=?"
                dbauthor = myDB.match(query, (author['authorid'], ))
                authorname = dbauthor['AuthorName']

            controlValueDict = {"AuthorName": authorname}

            if not dbauthor:
                newValueDict = {
                    "AuthorID": "0: %s" % authorname,
                    "Status": "Loading"
                }
                logger.debug("Now adding new author: %s to database" %
                             authorname)
                entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
                new_author = True
            else:
                newValueDict = {"Status": "Loading"}
                logger.debug("Now updating author: %s" % authorname)
                entry_status = dbauthor['Status']
                new_author = False
            myDB.upsert("authors", newValueDict, controlValueDict)

            if author:
                authorid = author['authorid']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorName": authorname}
                newValueDict = {
                    "AuthorID": author['authorid'],
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today(),
                    "Status": "Loading"
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorname)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorName=?',
                                (authorname, ))
                return
        if not match:
            logger.error(
                "AddAuthorToDB: No matching result for authorname or authorid")
            return

        # if author is set to manual, should we allow replacing 'nophoto' ?
        new_img = False
        match = myDB.match("SELECT Manual from authors WHERE AuthorID=?",
                           (authorid, ))
        if not match or not match['Manual']:
            if authorimg and 'nophoto' in authorimg:
                newimg = getAuthorImage(authorid)
                if newimg:
                    authorimg = newimg
                    new_img = True

        # allow caching
        if authorimg and authorimg.startswith('http'):
            newimg, success = cache_img("author",
                                        authorid,
                                        authorimg,
                                        refresh=refresh)
            if success:
                authorimg = newimg
                new_img = True
            else:
                logger.debug('Failed to cache image for %s' % authorimg)

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)

        if addbooks:
            # audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
            if new_author:
                bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
            else:
                bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

            if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']:
                entry_status = 'Active'  # default for invalid/unknown or "loading"
            # process books
            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                book_api = GoogleBooks()
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          entrystatus=entry_status,
                                          refresh=refresh)
            elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                book_api = GoodReads(authorname)
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          entrystatus=entry_status,
                                          refresh=refresh)

            # update totals works for existing authors only.
            # New authors need their totals updating after libraryscan or import of books.
            if not new_author:
                update_totals(authorid)

            if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']:
                res = grfollow(authorid, True)
                if res.startswith('Unable'):
                    logger.warn(res)
                try:
                    followid = res.split("followid=")[1]
                    logger.debug('%s marked followed' % authorname)
                except IndexError:
                    followid = ''
                myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?',
                            (followid, authorid))
        else:
            # if we're not loading any books, mark author as ignored
            entry_status = 'Ignored'

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": entry_status}
        myDB.upsert("authors", newValueDict, controlValueDict)

        msg = "[%s] Author update complete, status %s" % (authorname,
                                                          entry_status)
        logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc(
        )
        logger.error(msg)
        return msg
Exemplo n.º 39
0
def addAuthorNameToDB(author=None, refresh=False, addbooks=True):
    # get authors name in a consistent format, look them up in the database
    # if not in database, try to import them.
    # return authorname,new where new=False if author already in db, new=True if added
    # authorname returned is our preferred name, or empty string if not found or unable to add
    myDB = database.DBConnection()
    new = False
    if len(author) < 2:
        logger.debug('Invalid Author Name [%s]' % author)
        return "", "", False

    author = formatAuthorName(author)
    # Check if the author exists, and import the author if not,
    check_exist_author = myDB.match(
        'SELECT AuthorID FROM authors where AuthorName="%s"' %
        author.replace('"', '""'))

    if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']:
        logger.debug('Author %s not found in database, trying to add' % author)
        # no match for supplied author, but we're allowed to add new ones
        GR = GoodReads(author)
        try:
            author_gr = GR.find_author_id()
        except Exception as e:
            logger.warn("Error finding author id for [%s] %s" %
                        (author, str(e)))
            return "", "", False

        # only try to add if GR data matches found author data
        if author_gr:
            authorname = author_gr['authorname']
            #authorid = author_gr['authorid']
            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
            match_auth = author.replace('.', ' ')
            match_auth = ' '.join(match_auth.split())

            match_name = authorname.replace('.', ' ')
            match_name = ' '.join(match_name.split())

            match_name = unaccented(match_name)
            match_auth = unaccented(match_auth)

            # allow a degree of fuzziness to cater for different accented character handling.
            # some author names have accents,
            # filename may have the accented or un-accented version of the character
            # The currently non-configurable value of fuzziness might need to go in config
            # We stored GoodReads unmodified author name in
            # author_gr, so store in LL db under that
            # fuzz.ratio doesn't lowercase for us
            match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
            if match_fuzz < 90:
                logger.debug(
                    "Failed to match author [%s] to authorname [%s] fuzz [%d]"
                    % (author, match_name, match_fuzz))

            # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
            if (author != "Unknown") and (match_fuzz >= 90):
                # use "intact" name for author that we stored in
                # GR author_dict, not one of the various mangled versions
                # otherwise the books appear to be by a different author!
                author = author_gr['authorname']
                authorid = author_gr['authorid']
                # this new authorname may already be in the
                # database, so check again
                check_exist_author = myDB.match(
                    'SELECT AuthorID FROM authors where AuthorID="%s"' %
                    authorid)
                if check_exist_author:
                    logger.debug('Found goodreads authorname %s in database' %
                                 author)
                else:
                    logger.info("Adding new author [%s]" % author)
                    try:
                        addAuthorToDB(authorname=author,
                                      refresh=refresh,
                                      authorid=authorid,
                                      addbooks=addbooks)
                        check_exist_author = myDB.match(
                            'SELECT AuthorID FROM authors where AuthorID="%s"'
                            % authorid)
                        if check_exist_author:
                            new = True
                    except Exception:
                        logger.debug('Failed to add author [%s] to db' %
                                     author)
    # check author exists in db, either newly loaded or already there
    if not check_exist_author:
        logger.debug("Failed to match author [%s] in database" % author)
        return "", "", False
    return author, check_exist_author['AuthorID'], new
Exemplo n.º 40
0
def addAuthorToDB(authorname=None):
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    GB = GoogleBooks(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    bookscount = 0
    books = GB.find_results()
    for book in books:

        # this is for rare cases where google returns multiple authors who share nameparts
        if book['authorname'] == authorname:

            controlValueDict = {"BookID": book['bookid']}
            newValueDict = {
                "AuthorName": book['authorname'],
                "AuthorID": authorid,
                "AuthorLink": authorimg,
                "BookName": book['bookname'],
                "BookDesc": book['bookdesc'],
                "BookIsbn": book['bookisbn'],
                "BookImg": book['bookimg'],
                "BookLink": book['booklink'],
                "BookRate": book['bookrate'],
                "BookPages": book['bookpages'],
                "BookDate": book['bookdate'],
                "BookLang": book['booklang'],
                "Status": "Skipped",
                "BookAdded": formatter.today()
            }

            myDB.upsert("books", newValueDict, controlValueDict)
            bookscount = bookscount + 1

    lastbook = myDB.action(
        "SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC"
        % authorname).fetchone()
    controlValueDict = {"AuthorName": authorname}
    newValueDict = {
        "Status": "Active",
        "TotalBooks": bookscount,
        "LastBook": lastbook['BookName'],
        "LastLink": lastbook['BookLink'],
        "LastDate": lastbook['BookDate']
    }

    myDB.upsert("authors", newValueDict, controlValueDict)
    logger.info("Processing complete: Added %s books to the database" %
                bookscount)
Exemplo n.º 41
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % (
            "goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" %
                    (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.debug("Error removing %s from %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" %
                            (book, shelf, content))

        logger.info("%s missing from goodreads %s" %
                    (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action(
                        'UPDATE books SET Status="Skipped" WHERE BookID=?',
                        (book, ))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" %
                                (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.debug("Error adding %s to %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" %
                            (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action(
                            'UPDATE books SET Status="Have" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in [
                            'Open', 'Have'
                    ]:
                        try:
                            res, content = GA.BookToList(book,
                                                         shelf,
                                                         action='remove')
                        except Exception as e:
                            logger.debug(
                                "Error removing %s from %s: %s %s" %
                                (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" %
                                         (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn(
                                "Failed to remove %s from %s shelf: %s" %
                                (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action(
                            'UPDATE books SET Status="Wanted" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn(
                            "Not setting %s as Wanted, already marked Open" %
                            book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(ll_list)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' %
                     traceback.format_exc())
        return 0, 0
Exemplo n.º 42
0
def addAuthorToDB(authorname=None,
                  refresh=False,
                  authorid=None,
                  addbooks=True):
    """
    Add an author to the database by name or id, and optionally get a list of all their books
    If author already exists in database, refresh their details and optionally booklist
    """
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "AddAuthorToDB"
    try:
        myDB = database.DBConnection()
        match = False
        authorimg = ''
        new_author = not refresh
        if authorid:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}

            dbauthor = myDB.match("SELECT * from authors WHERE AuthorID='%s'" %
                                  authorid)
            if not dbauthor:
                authorname = 'unknown author'
                logger.debug("Now adding new author id: %s to database" %
                             authorid)
                new_author = True
            else:
                authorname = dbauthor['authorname']
                logger.debug("Now updating author %s " % authorname)
                new_author = False

            myDB.upsert("authors", newValueDict, controlValueDict)

            GR = GoodReads(authorname)
            author = GR.get_author_info(authorid=authorid)
            if author:
                authorname = author['authorname']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today()
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorName"] = author['authorname']
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorid)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorID="%s"' %
                                authorid)

        if authorname and not match:
            authorname = ' '.join(
                authorname.split())  # ensure no extra whitespace
            GR = GoodReads(authorname)

            query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
                "'", "''")
            dbauthor = myDB.match(query)
            controlValueDict = {"AuthorName": authorname}

            if not dbauthor:
                newValueDict = {
                    "AuthorID": "0: %s" % authorname,
                    "Status": "Loading"
                }
                logger.debug("Now adding new author: %s to database" %
                             authorname)
                new_author = True
            else:
                newValueDict = {"Status": "Loading"}
                logger.debug("Now updating author: %s" % authorname)
                new_author = False
            myDB.upsert("authors", newValueDict, controlValueDict)

            author = GR.find_author_id(refresh=refresh)
            if author:
                authorid = author['authorid']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorName": authorname}
                newValueDict = {
                    "AuthorID": author['authorid'],
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today(),
                    "Status": "Loading"
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorname)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorName="%s"' %
                                authorname)
                return
        if not match:
            logger.error(
                "AddAuthorToDB: No matching result for authorname or authorid")
            return

        # if author is set to manual, should we allow replacing 'nophoto' ?
        new_img = False
        match = myDB.match("SELECT Manual from authors WHERE AuthorID='%s'" %
                           authorid)
        if not match or not match['Manual']:
            if authorimg and 'nophoto' in authorimg:
                newimg = getAuthorImage(authorid)
                if newimg:
                    authorimg = newimg
                    new_img = True

        # allow caching
        if authorimg and authorimg.startswith('http'):
            newimg, success = cache_img("author",
                                        authorid,
                                        authorimg,
                                        refresh=refresh)
            if success:
                authorimg = newimg
                new_img = True
            else:
                logger.debug('Failed to cache image for %s' % authorimg)

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)

        if addbooks:
            if new_author:
                bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
            else:
                bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

            # process books
            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                book_api = GoogleBooks()
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          refresh=refresh)
            elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                GR = GoodReads(authorname)
                GR.get_author_books(authorid,
                                    authorname,
                                    bookstatus,
                                    refresh=refresh)

            # update totals works for existing authors only.
            # New authors need their totals updating after libraryscan or import of books.
            if not new_author:
                update_totals(authorid)
        else:
            # if we're not loading any books, mark author as ignored
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Ignored"}
            myDB.upsert("authors", newValueDict, controlValueDict)

        msg = "[%s] Author update complete" % authorname
        logger.debug(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc(
        )
        logger.error(msg)
        return msg
Exemplo n.º 43
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName))

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0

                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s"
                        % (
                            dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                            subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        )
                    )
                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split(".")
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if "title" in res and "creator" in res:  # this is the minimum we need
                            match = 1
                            book = res["title"]
                            author = res["creator"]
                            if "language" in res:
                                language = res["language"]
                            if "identifier" in res:
                                isbn = res["identifier"]
                            if "type" in res:
                                extn = res["type"]
                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn))
                        else:
                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if "title" in res and "creator" in res:  # this is the minimum we need
                        match = 1
                        book = res["title"]
                        author = res["creator"]
                        if "language" in res:
                            language = res["language"]
                        if "identifier" in res:
                            isbn = res["identifier"]
                        logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                            if not match:
                                myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                            else:
                                logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(",")
                            author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                        if author[1] == " ":
                            author = author.replace(" ", ".")
                            author = author.replace("..", ".")

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' % author
                        ).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn("Error finding author id for [%s]" % author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr["authorname"]

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace(".", "_")
                                match_auth = match_auth.replace(" ", "_")
                                match_auth = match_auth.replace("__", "_")
                                match_name = authorname.replace(".", "_")
                                match_name = match_name.replace(" ", "_")
                                match_name = match_name.replace("__", "_")
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)
                                    )

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr["authorname"]
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' % author
                                    ).fetchone()
                                    if not check_exist_author:
                                        logger.debug("Adding new author [%s]" % author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' % author
                                            ).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug("Failed to match author [%s] in database" % author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', "").replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)
                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)
                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' % bookid
                                ).fetchone()
                                if check_status["Status"] != "Open":
                                    # update status as we've got this book
                                    myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid)
                                    book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open
                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)
                                    )
                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if stats["sum(GR_book_hits)"] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
        logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
        logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"])
        logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"])
        logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"])
        logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select("select AuthorName from authors")
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug("Updating bookcounts for %i authors" % len(authors))
    for author in authors:
        name = author["AuthorName"]
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")'
            % name
        ).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name
        ).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name))

    logger.info("Library scan complete")
Exemplo n.º 44
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))
        # print ll_list
        # print gr_shelf

        not_on_shelf = []
        not_in_ll = []
        for book in ll_list:
            if book not in gr_shelf:
                not_on_shelf.append(book)
        for book in gr_shelf:
            if book not in ll_list:
                not_in_ll.append(book)

        to_shelf = 0
        to_ll = 0
        # these need adding to shelf
        if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.debug('Not connected to goodreads')
        else:
            for book in not_on_shelf:
                # print "%s is not on shelf" % book
                try:
                    res, content = GA.BookToList(book, shelf)
                except Exception as e:
                    logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e)))
                    res = None

                if res:
                    if lazylibrarian.LOGLEVEL > 2:
                        logger.debug("%10s added to %s shelf" % (book, shelf))
                        to_shelf += 1
                        # print content
                else:
                    logger.debug("Failed to add %s to %s shelf" % (book, shelf))
                    # print content

        # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have,
        # if in "to-read" and already marked "Open/Have", optionally delete from "to-read"
        # (depending on user prefs, to-read and wanted might not be the same thing)
        # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open

        for book in not_in_ll:
            # print "%s is not marked %s" % (book, status)
            cmd = 'select Status from books where bookid="%s"' % book
            result = myDB.match(cmd)
            if result:
                if result['Status'] in ['Have', 'Open']:  # don't change status if we have it
                    logger.debug("%10s is already marked %s" % (book, result['Status']))
                    if lazylibrarian.CONFIG['GR_UNIQUE']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e)))
                            res = None
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            # print content
                        else:
                            logger.debug("Failed to remove %s from %s shelf" % (book, shelf))
                            # print content

                elif shelf == 'owned':
                    myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                else:
                    myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book))
            else:  # add book to database as wanted
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                to_ll += 1

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return to_shelf, to_ll

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0, 0
Exemplo n.º 45
0
    def find_book(bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = get_json_request(URL)

        if not jsonresults:
            logger.debug('No results found for %s' % bookid)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            if booklang not in valid_langs and 'All' not in valid_langs:
                logger.debug(
                    'Book %s googlebooks language does not match preference, %s'
                    % (bookname, booklang))
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = ""

        series = ""
        seriesNum = ""
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = ""
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(
                    ')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = ""
        except KeyError:
            booksub = ""

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = ""

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = ""

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = ""
        except KeyError:
            bookisbn = ""

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match(
                'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName="%s"' %
                    author['authorname'])
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author as "ignored"
                    # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (authorname, bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link, success = cache_img("book", bookid, bookimg)
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemplo n.º 46
0
def processAlternate(source_dir=None):
    # import a book from an alternate directory
    try:
        if not source_dir:
            logger.warn("Alternate Directory not configured")
            return False
        elif not os.path.isdir(source_dir):
            logger.warn("Alternate Directory [%s] not found" % source_dir)
            return False
        if source_dir == lazylibrarian.DIRECTORY('Destination'):
            logger.warn('Alternate directory must not be the same as Destination')
            return False

        logger.debug('Processing alternate directory %s' % source_dir)
        # first, recursively process any books in subdirectories
        for fname in os.listdir(source_dir):
            subdir = os.path.join(source_dir, fname)
            if os.path.isdir(subdir):
                processAlternate(subdir)
        # only import one book from each alternate (sub)directory, this is because
        # the importer may delete the directory after importing a book,
        # depending on lazylibrarian.DESTINATION_COPY setting
        # also if multiple books in a folder and only a "metadata.opf"
        # which book is it for?
        new_book = book_file(source_dir, booktype='book')
        if new_book:
            metadata = {}
            # see if there is a metadata file in this folder with the info we need
            # try book_name.opf first, or fall back to any filename.opf
            metafile = os.path.splitext(new_book)[0] + '.opf'
            if not os.path.isfile(metafile):
                metafile = opf_file(source_dir)
            if metafile and os.path.isfile(metafile):
                try:
                    metadata = get_book_info(metafile)
                except Exception as e:
                    logger.debug('Failed to read metadata from %s, %s' % (metafile, str(e)))
            else:
                logger.debug('No metadata file found for %s' % new_book)
            if 'title' not in metadata or 'creator' not in metadata:
                # if not got both, try to get metadata from the book file
                try:
                    metadata = get_book_info(new_book)
                except Exception as e:
                    logger.debug('No metadata found in %s, %s' % (new_book, str(e)))
            if 'title' in metadata and 'creator' in metadata:
                authorname = metadata['creator']
                bookname = metadata['title']
                myDB = database.DBConnection()

                authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname))

                if not authmatch:
                    # try goodreads preferred authorname
                    logger.debug("Checking GoodReads for [%s]" % authorname)
                    GR = GoodReads(authorname)
                    try:
                        author_gr = GR.find_author_id()
                    except Exception:
                        logger.debug("No author id for [%s]" % authorname)
                    if author_gr:
                        grauthorname = author_gr['authorname']
                        logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname))
                        authorname = grauthorname
                        authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname))

                if authmatch:
                    logger.debug("ALT: Author %s found in database" % (authorname))
                else:
                    logger.debug("ALT: Author %s not found, adding to database" % (authorname))
                    addAuthorToDB(authorname)

                bookid = find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    return import_book(source_dir, bookid)
                else:
                    logger.warn("Book %s by %s not found in database" % (bookname, authorname))
            else:
                logger.warn('Book %s has no metadata, unable to import' % new_book)
        else:
            logger.warn("No book file found in %s" % source_dir)
        return False
    except Exception as e:
        logger.error('Unhandled exception in processAlternate: %s' % traceback.format_exc())
Exemplo n.º 47
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))

        logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" % (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn("Not setting %s as Wanted, already marked Open" % book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0
Exemplo n.º 48
0
def LibraryScan(dir=None):
	if not dir:
		if not lazylibrarian.DOWNLOAD_DIR:
			return
		else:
			dir = lazylibrarian.DOWNLOAD_DIR

	if not os.path.isdir(dir):
		logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
		return
	
	myDB = database.DBConnection()
	new_authors = []

	logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

	book_list = []
	new_book_count = 0
	file_count = 0 
	book_exists = False

	if (lazylibrarian.FULL_SCAN):
		books = myDB.select('select AuthorName, BookName from books where Status=?',[u'Open'])
		status = lazylibrarian.NOTFOUND_STATUS
		logger.info('Missing books will be marked as %s' % status)
		for book in books:
			for book_type in getList(lazylibrarian.EBOOK_TYPE):
				bookName = book['BookName']
				bookAuthor = book['AuthorName']
				#Default destination path, should be allowed change per config file.
				dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
				#dest_path = authorname+'/'+bookname
				global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)

				encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
				if os.path.isfile(encoded_book_path):
					book_exists = True	
			if not book_exists:
				myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
				logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
				if bookAuthor not in new_authors:
					new_authors.append(bookAuthor)

	latest_subdirectory = []
	for r,d,f in os.walk(dir):
		for directory in d[:]:
			if directory.startswith("."):
				d.remove(directory)
			#prevent magazine being scanned
			if directory.startswith("_"):
				d.remove(directory)
		for files in f:
			 subdirectory = r.replace(dir,'')
			 latest_subdirectory.append(subdirectory)
			 logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
			 matchString = ''
			 for char in lazylibrarian.EBOOK_DEST_FILE:
				matchString = matchString + '\\' + char
			 #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
			 booktypes = ''
			 count=-1;
			 booktype_list =  getList(lazylibrarian.EBOOK_TYPE)
			 for book_type in booktype_list:
			 	count+=1
				if count == 0:
					booktypes = book_type
				else:
					booktypes = booktypes + '|'+book_type
			 matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e","(?P<book>.*?)")+'\.['+booktypes+']'
			 #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE)
			 pattern = re.compile(matchString, re.VERBOSE)
			 match = pattern.match(files)
			 if match:
				author = match.group("author")
				book = match.group("book")
			 	#check if book is in database, and not marked as in library
				check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",[author,book,'Open']).fetchone()
				if not check_exist_book:
					check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?",[author]).fetchone()
					if not check_exist_author and lazylibrarian.ADD_AUTHOR:
						GR = GoodReads(author)
						try:
							author_gr = GR.find_author_id()
						except:
							continue
						#only try to add if GR data matches found author data
						if author_gr:
							authorid = author_gr['authorid']
							authorlink  = author_gr['authorlink']
							pageIdx = authorlink.rfind('/')
							authorlink  = authorlink[pageIdx+1:]
							match_auth = authorid+"."+author.replace('. ','_')
							logger.debug(match_auth)
							logger.debug(authorlink)
							if match_auth == authorlink:
								logger.info("Adding %s" % author)
								try:
									importer.addAuthorToDB(author)
								except:
									continue
								check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=?",[author,book]).fetchone()
								if check_exist_book:
									if author not in new_authors:
										new_authors.append(author)
									myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book])
									new_book_count += 1
							else:
								logger.info("Unable to match %s in GoodReads database" % author)
							

				else:
					if author not in new_authors:
						new_authors.append(author)
					myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book])
					new_book_count += 1
				
				file_count += 1
	
	logger.info("%s new/modified books found and added to the database" % new_book_count)
	logger.info('Updating %i authors' % len(new_authors))
	for auth in new_authors:
		havebooks = len(myDB.select('select BookName from Books where status=? and AuthorName=?',['Open',auth]))
		myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',[havebooks,auth])
		totalbooks = len(myDB.select('select BookName from Books where status!=? and AuthorName=?',['Ignored',auth]))
		myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',[totalbooks,auth]) 

	logger.info('Library scan complete')
Exemplo n.º 49
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    new_authors = []

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    book_list = []
    new_book_count = 0
    file_count = 0
    book_exists = False

    if lazylibrarian.FULL_SCAN:
        books = myDB.select("select AuthorName, BookName, BookFile, BookID from books where Status=?", [u"Open"])
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if os.path.isfile(bookfile):
                book_exists = True
            else:
                myDB.action("update books set Status=? where BookID=?", [status, bookID])
                myDB.action('update books set BookFile="" where BookID=?', [bookID])
                logger.info("Book %s updated as not found on disk" % bookfile)
                # for book_type in getList(lazylibrarian.EBOOK_TYPE):
                # 	bookName = book['BookName']
                # 	bookAuthor = book['AuthorName']
                # 	#Default destination path, should be allowed change per config file.
                # 	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
                # 	#dest_path = authorname+'/'+bookname
                # 	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
                #
                # 	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
                # 	if os.path.isfile(encoded_book_path):
                # 		book_exists = True
                # if not book_exists:
                # 	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
                # 	logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

                # guess this was meant to save repeat-scans of the same directory
                # if it contains multiple formats of the same book, but there was no code
                # that looked at the array. renamed from latest to processed to make purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
                # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                logger.info(
                    "[%s] Now scanning subdirectory %s"
                    % (
                        dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                    )
                )

                # 			If this is a book, try to get author/title/isbn/language
                # 			If metadata.opf exists, use that
                # 			else if epub or mobi, read metadata from the book
                # 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR later
                #
                # 			Is it a book (extension found in booktypes)
                match = 0
                words = files.split(".")
                extn = words[len(words) - 1]
                if extn in booktypes:
                    # see if there is a metadata file in this folder with the info we need
                    try:
                        metafile = os.path.join(r, "metadata.opf").encode(lazylibrarian.SYS_ENCODING)
                        res = get_book_info(metafile)
                        if res:
                            book = res["title"]
                            author = res["creator"]
                            language = res["language"]
                            isbn = res["identifier"]
                            match = 1
                            logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))

                    except:
                        logger.debug("No metadata file in %s" % r)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            res = get_book_info(book_file)
                            if res:
                                book = res["title"]
                                author = res["creator"]
                                language = res["language"]
                                isbn = res["identifier"]
                                match = 1
                                logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                else:
                    processed_subdirectories.append(subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                        # strip any formatting from the isbn
                    isbn = re.sub("[- ]", "", isbn)
                    if len(isbn) != 10 and len(isbn) != 13:
                        isbn = ""
                    if not isbn.isdigit():
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                        # we need to add it to language cache if not already there
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                        if not match:
                            myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                            logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                        else:
                            logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(",")
                        author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                    author = author.replace(". ", " ")
                    author = author.replace(".", " ")
                    author = author.replace("  ", " ")

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?", [author]).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.error("Error finding author id for [%s]" % author)
                            continue

                            # only try to add if GR data matches found author data
                            # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr["authorname"]

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace(".", "_")
                            match_auth = match_auth.replace(" ", "_")
                            match_auth = match_auth.replace("__", "_")
                            match_name = authorname.replace(".", "_")
                            match_name = match_name.replace(" ", "_")
                            match_name = match_name.replace("__", "_")

                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The (currently non-configurable) value of fuzziness works for one accented character
                            # We stored GoodReads unmodified author name in author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.info("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                logger.info("match author [%s] authorname [%s]" % (match_auth, match_name))

                                # To save loading hundreds of books by unknown authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a different author!
                                author = author_gr["authorname"]
                                # this new authorname may already be in the database, so check again
                                check_exist_author = myDB.action(
                                    "SELECT * FROM authors where AuthorName=?", [author]
                                ).fetchone()
                                if not check_exist_author:
                                    logger.info("Adding new author [%s]" % author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            "SELECT * FROM authors where AuthorName=?", [author]
                                        ).fetchone()
                                    except:
                                        continue

                                        # check author exists in db, either newly loaded or already there
                    if not check_exist_author:
                        logger.info("Failed to match author [%s] in database" % author)
                    else:
                        # author exists, check if this book by this author is in our database
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so, we already had it)
                            check_status = myDB.action("SELECT Status from books where BookID=?", [bookid]).fetchone()
                            if check_status["Status"] != "Open":
                                # update status as we've got this book
                                myDB.action("UPDATE books set Status=? where BookID=?", [u"Open", bookid])
                                book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it gets removed, or maybe allow click-to-open?
                                myDB.action("UPDATE books set BookFile=? where BookID=?", [book_file, bookid])
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if lazylibrarian.BOOK_API == "GoogleBooks":
        logger.info("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
        logger.info("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
    if lazylibrarian.BOOK_API == "GoodReads":
        logger.info("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
        logger.info("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
    logger.info("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
    logger.info("Language cache was hit %s times" % stats["sum(cache_hits)"])
    logger.info("Unwanted language removed %s books" % stats["sum(bad_lang)"])
    logger.info("Unwanted characters removed %s books" % stats["sum(bad_char)"])
    logger.info("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.info("ISBN Language cache holds %s entries" % cachesize["count(*)"])
    stats = len(myDB.select("select BookID from Books where status=? and BookLang=?", ["Open", "Unknown"]))
    logger.info("There are %s books in your library with unknown language" % stats)

    logger.info("Updating %i authors" % len(new_authors))
    for auth in new_authors:
        havebooks = len(myDB.select("select BookName from Books where status=? and AuthorName=?", ["Open", auth]))
        myDB.action("UPDATE authors set HaveBooks=? where AuthorName=?", [havebooks, auth])
        totalbooks = len(myDB.select("select BookName from Books where status!=? and AuthorName=?", ["Ignored", auth]))
        myDB.action("UPDATE authors set UnignoredBooks=? where AuthorName=?", [totalbooks, auth])

    logger.info("Library scan complete")
Exemplo n.º 50
0
def grsync(status, shelf):
    try:
        logger.debug('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        gr_shelf = GA.get_gr_shelf(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.debug("There are %s %s books, %s books on goodreads %s shelf" %
                     (len(ll_list), dstatus, len(gr_shelf), shelf))
        #print ll_list
        #print gr_shelf

        not_on_shelf = []
        not_in_ll = []
        for book in ll_list:
            if book not in gr_shelf:
                not_on_shelf.append(book)
        for book in gr_shelf:
            if book not in ll_list:
                not_in_ll.append(book)

        to_shelf = 0
        to_ll = 0
        moved = 0
        # these need adding to shelf
        if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.debug('Not connected to goodreads')
        else:
            for book in not_on_shelf:
                #print "%s is not on shelf" % book
                time_now = int(time.time())
                if time_now <= lazylibrarian.LAST_GOODREADS:
                    time.sleep(1)
                    lazylibrarian.LAST_GOODREADS = time_now
                try:
                    res, content = GA.BookToList(book, shelf)
                except Exception as e:
                    logger.debug("Error in BookToList: %s" % str(e))
                    res = None

                if res:
                    logger.debug("%10s added to %s shelf" % (book, shelf))
                    to_shelf += 1
                    #print content
                else:
                    logger.debug("Failed to add %s to %s shelf" % (book, shelf))
                    #print content

        # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have,
        # if they are already Open/Have, remove from goodreads to-read shelf, add to owned shelf
        # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open

        for book in not_in_ll:
            #print "%s is not marked %s" % (book, status)
            cmd = 'select Status from books where bookid="%s"' % book
            result = myDB.match(cmd)
            if result:
                if result['Status'] in ['Have', 'Open']:  # don't change status if we have it
                    if shelf == 'to-read':

                        time_now = int(time.time())
                        if time_now <= lazylibrarian.LAST_GOODREADS:
                            time.sleep(1)
                            lazylibrarian.LAST_GOODREADS = time_now
                        # need to move it from to-read shelf to owned shelf
                        res, content = GA.BookToList(book, 'to-read', 'remove')
                        if res:
                            logger.debug("%10s removed from to-read shelf" % book)
                            #print content
                        else:
                            logger.debug("Failed to remove %s from to-read shelf" % book)
                            #print content

                        time_now = int(time.time())
                        if time_now <= lazylibrarian.LAST_GOODREADS:
                            time.sleep(1)
                            lazylibrarian.LAST_GOODREADS = time_now
                        res, content = GA.BookToList(book, 'owned', 'add')
                        if res:
                            logger.debug("%10s added to owned shelf" % book)
                            moved += 1
                            #print content
                        else:
                            logger.debug("Failed to add %s to owned shelf" % book)
                            #print content
                    else:
                        logger.debug("%10s is already marked %s" % (book, result['Status']))
                elif shelf == 'owned':
                    myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                else:
                    myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book))
            else:  # add book to database as wanted
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                to_ll += 1

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return to_shelf, to_ll, moved

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0,0,0
Exemplo n.º 51
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug(
                'Book %s googlebooks language does not match preference, %s' %
                (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn(
                    'Book %s Publication date does not match preference, %s' %
                    (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn(
                    'Book %s Future publication date does not match preference, %s'
                    % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?',
                               (AuthorID, ))
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName=?',
                    (author['authorname'], ))
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [
                            'Skipped', 'Ignored'
                    ]:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID,
                                              entrystatus=lazylibrarian.
                                              CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" %
                    (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' %
                             (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'],
                           cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemplo n.º 52
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    new_authors = []

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action(
                    'update books set Status="%s" where BookID="%s"' %
                    (status, bookID))
                myDB.action(
                    'update books set BookFile="" where BookID="%s"' %
                    bookID)
                logger.warn(
                    'Book %s - %s updated as not found on disk' %
                    (bookAuthor, bookName))
            # for book_type in getList(lazylibrarian.EBOOK_TYPE):
            #	bookName = book['BookName']
            #	bookAuthor = book['AuthorName']
            # Default destination path, should be allowed change per config file.
            #	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
            # dest_path = authorname+'/'+bookname
            #	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
#
            #	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
            #	if os.path.isfile(encoded_book_path):
            #		book_exists = True
            # if not book_exists:
            #	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
            # logger.info('Book %s updated as not found on disk' %
            # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same
            # subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
# 			If this is a book, try to get author/title/isbn/language
# 			If metadata.opf exists, use that
# 			else if epub or mobi, read metadata from the book
# 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR late
                match = 0
                extn = ""
                
                if '.' in files:
                    words = files.split('.')
                    extn = words[len(words) - 1]
                    
                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s" %
                        (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
                    
                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know 
                    # LL preferred authorname/bookname at this point
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        else:
                            language = ""
                        if 'identifier' in res:
                            isbn = res['identifier']
                        else:
                            isbn = ""
                        match = 1
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata
                        # from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            try:
                                res = get_book_info(book_filename)
                            except:
                                res = {}
                            if 'title' in res and 'creator' in res:  # this is the minimum we need
                                book = res['title']
                                author = res['creator']
                                if 'language' in res:
                                    language = res['language']
                                else:
                                    language = ""
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                else:
                                    isbn = ""
                                logger.debug("book meta [%s] [%s] [%s] [%s]" %
                                    (isbn, language, author, book))
                                match = 1
                            else:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                if match:
                    processed_subdirectories.append(
                        subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                    if not formatter.is_valid_isbn(isbn):
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug(
                            "Found Language [%s] ISBN [%s]" %
                            (language, isbn))
                        # we need to add it to language cache if not already
                        # there, is_valid_isbn has checked length is 10 or 13
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action(
                            'SELECT lang FROM languages where isbn = "%s"' %
                            (isbnhead)).fetchone()
                        if not match:
                            myDB.action(
                                'insert into languages values ("%s", "%s")' %
                                (isbnhead, language))
                            logger.debug(
                                "Cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))
                        else:
                            logger.debug(
                                "Already cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))

                    # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(',')
                        author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                    if author[1] == ' ':        
                        author = author.replace(' ', '.')
                        author = author.replace('..', '.')

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action(
                        'SELECT * FROM authors where AuthorName="%s"' %
                        author).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to
                        # add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.warn(
                                "Error finding author id for [%s]" %
                                author)
                            continue

                        # only try to add if GR data matches found author data
                        # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr['authorname']

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_name = authorname.replace('.', '_')
                            match_name = match_name.replace(' ', '_')
                            match_name = match_name.replace('__', '_')
                            match_name = common.remove_accents(match_name)
                            match_auth = common.remove_accents(match_auth)
                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The currently non-configurable value of fuzziness might need to go in config
                            # We stored GoodReads unmodified author name in
                            # author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.debug(
                                    "Failed to match author [%s] fuzz [%d]" %
                                    (author, match_fuzz))
                                logger.debug(
                                    "Failed to match author [%s] to authorname [%s]" %
                                    (match_auth, match_name))

                            # To save loading hundreds of books by unknown
                            # authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a
                                # different author!
                                author = author_gr['authorname']
                                # this new authorname may already be in the
                                # database, so check again
                                check_exist_author = myDB.action(
                                    'SELECT * FROM authors where AuthorName="%s"' %
                                    author).fetchone()
                                if not check_exist_author:
                                    logger.debug(
                                        "Adding new author [%s]" %
                                        author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            'SELECT * FROM authors where AuthorName="%s"' %
                                            author).fetchone()
                                    except:
                                        continue

                    # check author exists in db, either newly loaded or already
                    # there
                    if not check_exist_author:
                        logger.debug(
                            "Failed to match author [%s] in database" %
                            author)
                    else:
                        # author exists, check if this book by this author is in our database
                        # metadata might have quotes in book name
                        book = book.replace('"', '').replace("'", "")
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so,
                            # we already had it)
                            check_status = myDB.action(
                                'SELECT Status from books where BookID="%s"' %
                                bookid).fetchone()
                            if check_status['Status'] != 'Open':
                                # update status as we've got this book
                                myDB.action(
                                    'UPDATE books set Status="Open" where BookID="%s"' %
                                    bookid)
                                book_filename = os.path.join(
                                    r,
                                    files).encode(
                                        lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it
                                # gets removed, or allow click-to-open
                                myDB.action(
                                    'UPDATE books set BookFile="%s" where BookID="%s"' %
                                    (book_filename, bookid))
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    if new_book_count:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)'])
    stats = len(
        myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn(
            "There are %s books in your library with unknown language" %
            stats)

    logger.debug('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' %
                        ('Open', auth)))
        myDB.action(
            'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' %
            (havebooks, auth))
        totalbooks = len(
            myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' %
                        ('Ignored', auth)))
        myDB.action(
            'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' %
            (totalbooks, auth))

    logger.info('Library scan complete')
Exemplo n.º 53
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    bookscount = 0
    books = GR.get_author_books(authorid)
    for book in books:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName": book['authorname'],
            "AuthorID": authorid,
            "AuthorLink": authorimg,
            "BookName": book['bookname'],
            "BookSub": book['booksub'],
            "BookDesc": book['bookdesc'],
            "BookIsbn": book['bookisbn'],
            "BookPub": book['bookpub'],
            "BookGenre": book['bookgenre'],
            "BookImg": book['bookimg'],
            "BookLink": book['booklink'],
            "BookRate": book['bookrate'],
            "BookPages": book['bookpages'],
            "BookDate": book['bookdate'],
            "BookLang": book['booklang'],
            "Status": "Skipped",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        bookscount = bookscount + 1

    lastbook = myDB.action(
        "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC"
        % authorid).fetchone()
    bookCount = myDB.select(
        "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" %
        authorid)

    for count in bookCount:
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "TotalBooks": count['counter'],
            "LastBook": lastbook['BookName'],
            "LastLink": lastbook['BookLink'],
            "LastDate": lastbook['BookDate']
        }

        myDB.upsert("authors", newValueDict, controlValueDict)
        logger.info("Processing complete: Added %s books to the database" %
                    str(count['counter']))