def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None, book_id=None): pp_path = pp_path.encode(lazylibrarian.SYS_ENCODING) # check we got a book/magazine in the downloaded files pp = False if bookname: # None if it's a magazine booktype = 'book' else: booktype = 'mag' for bookfile in os.listdir(pp_path): if formatter.is_valid_booktype(bookfile, booktype=booktype): pp = True if pp is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.debug('Failed to locate a book/magazine in downloaded files, leaving for manual processing') return pp try: if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) else: logger.debug('%s already exists. Removing existing tree.' % dest_path) try: shutil.rmtree(dest_path) except Exception as why: logger.debug("Failed to rmtree %s, %s" % (dest_path, str(why))) logger.debug('Attempting to copy/move tree') if lazylibrarian.DESTINATION_COPY == True and lazylibrarian.DOWNLOAD_DIR != pp_path: try: shutil.copytree(pp_path, dest_path) logger.debug('Successfully copied %s to %s' % (pp_path, dest_path)) except Exception as why: logger.debug('Failed to copy %s to %s, %s' % (pp_path, dest_path, str(why))) elif lazylibrarian.DOWNLOAD_DIR == pp_path: for file3 in os.listdir(pp_path): if formatter.is_valid_booktype(file3, booktype=booktype): bookID = str(file3).split("LL.(")[1].split(")")[0] if bookID == book_id: logger.debug('Processing %s' % bookID) if not os.path.exists(dest_path): try: os.makedirs(dest_path) except Exception, e: logger.debug("Unable to makedir %s, %s" % (dest_path, str(e))) if lazylibrarian.DESTINATION_COPY == True: try: shutil.copyfile(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % (file3, dest_path, str(why))) else: try: shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (file3, dest_path, str(why))) else:
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir is not None and os.path.isdir(search_dir): for fname in os.listdir(search_dir): if is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) return ""
def book_file(search_dir=None): # find a book file in this directory, any book will do # return full pathname of book, or empty string if no book found if search_dir and os.path.isdir(search_dir): for fname in os.listdir(search_dir): if formatter.is_valid_booktype(fname): return os.path.join(search_dir, fname).encode(lazylibrarian.SYS_ENCODING) return ""
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir is not None and os.path.isdir(search_dir): for fname in os.listdir(search_dir): if formatter.is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) # .encode(lazylibrarian.SYS_ENCODING) return ""
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir and os.path.isdir(search_dir): for fname in os.listdir(search_dir): if is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) return ""
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid, )) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' else: f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) try: calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename( r) logger.debug(msg) else: book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = lazylibrarian.CONFIG['EBOOK_DEST_FILE'] new_basename = new_basename.replace('$Author', exists['AuthorName']).replace( '$Title', exists['BookName']) if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(r): extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith( '.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(r, fname) nfname = os.path.join(r, new_basename + extn) try: shutil.move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) if ofname == exists[ 'BookFile']: # if we renamed the preferred filetype, return new name f = nfname except Exception as e: logger.error( 'Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir is None or booktype is None: return "" # ensure directory is unicode so we get unicode results from listdir if isinstance(search_dir, str): search_dir = search_dir.decode(lazylibrarian.SYS_ENCODING) if search_dir and os.path.isdir(search_dir): for fname in os.listdir(search_dir): if is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) return ""
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir is None or booktype is None: return "" if search_dir and os.path.isdir(search_dir): try: for fname in os.listdir(makeBytestr(search_dir)): fname = makeUnicode(fname) if is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) except Exception as e: logger.warn('Listdir error [%s]: %s %s' % (search_dir, type(e).__name__, str(e))) return ""
def book_file(search_dir=None, booktype=None): # find a book/mag file in this directory, any book will do # return full pathname of book/mag, or empty string if none found if search_dir is None or booktype is None: return "" # ensure directory is unicode so we get unicode results from listdir if isinstance(search_dir, str) and hasattr(search_dir, 'decode'): search_dir = search_dir.decode(lazylibrarian.SYS_ENCODING) if search_dir and os.path.isdir(search_dir): try: for fname in os.listdir(search_dir): if is_valid_booktype(fname, booktype=booktype): return os.path.join(search_dir, fname) except Exception as e: logger.warn('Listdir error [%s]: %s %s' % (search_dir, type(e).__name__, str(e))) return ""
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if is_valid_booktype(bookfile, booktype=booktype): got_book = bookfile break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn( 'Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False # Do we want calibre to import the book for us if bookname and len(lazylibrarian.IMP_CALIBREDB): try: logger.debug('Creating metadata for calibre') dest_path = pp_path global_name = os.path.splitext(got_book)[0] bookid = '' booklang = '' bookisbn = '' bookpub = '' bookdate = '' bookdesc = '' processOPF(dest_path, authorname, bookname, bookisbn, bookid, bookpub, bookdate, bookdesc, booklang, global_name) logger.debug('Importing %s, %s into calibre library' % (authorname, bookname)) params = [ lazylibrarian.IMP_CALIBREDB, 'add', '-1', '--with-library', lazylibrarian.DESTINATION_DIR, pp_path ] logger.debug(str(params)) res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug( '%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res))) calibre_dir = os.path.join(lazylibrarian.DESTINATION_DIR, unaccented_str(authorname), '') if os.path.isdir(calibre_dir): imported = LibraryScan( calibre_dir ) # rescan authors directory so we get the new book in our database else: imported = LibraryScan( lazylibrarian.DESTINATION_DIR ) # may have to rescan whole library instead if not imported and not 'already exist' in res: return False except subprocess.CalledProcessError as e: logger.debug(params) logger.debug('calibredb import failed: %s' % e.output) return False except OSError as e: logger.debug('calibredb failed, %s' % e.strerror) return False else: # we are copying the files ourselves if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror)) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror)) return False # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. # After the copy completes, delete source files if DESTINATION_COPY not set, # but don't delete source files if copy failed or if in root of download dir for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile( os.path.join(pp_path, fname), os.path.join(dest_path, global_name + os.path.splitext(fname)[1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % (fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) # calibre or ll copied the files we want, now delete source files if not in download root dir if not lazylibrarian.DESTINATION_COPY: if pp_path != lazylibrarian.DOWNLOAD_DIR: try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) return False return True
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) if not lazylibrarian.CONFIG['CALIBRE_RENAME']: try: # noinspection PyTypeChecker calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename(r) logger.debug(msg) return f reject = multibook(r) if reject: logger.debug("Not renaming %s, found multiple %s" % (f, reject)) return f seriesinfo = nameVars(bookid) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('eBook') dest_path = os.path.join(dest_dir, dest_path) dest_path = stripspaces(dest_path) oldpath = r if oldpath != dest_path: try: dest_path = safe_move(oldpath, dest_path) except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = seriesinfo['BookFile'] if ' / ' in new_basename: # used as a separator in goodreads omnibus logger.warn("bookRename [%s] looks like an omnibus? Not renaming %s" % (new_basename, book_basename)) new_basename = book_basename if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(makeBytestr(dest_path)): fname = makeUnicode(fname) extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith('.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(dest_path, fname) nfname = os.path.join(dest_path, new_basename + extn) if ofname != nfname: try: nfname = safe_move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) oldname = os.path.join(oldpath, fname) if oldname == exists['BookFile']: # if we renamed/moved the preferred file, return new name f = nfname except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def audioRename(bookid): for item in ['$Part', '$Title']: if item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioRename, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid, )) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioRename %s" % bookid) return '' else: logger.debug("Invalid bookid in audioRename %s" % bookid) return '' cnt = 0 parts = [] author = book = track = '' for f in os.listdir(r): if is_valid_booktype(f, booktype='audiobook'): cnt += 1 try: id3r = id3reader.Reader(os.path.join(r, f)) author = id3r.getValue('performer') book = id3r.getValue('album') track = id3r.getValue('track') if not track: track = '0' if author and book: parts.append([track, book, author, f]) except Exception as e: logger.debug("id3reader %s %s" % (type(e).__name__, str(e))) pass logger.debug("%s found %s audiofiles" % (exists['BookName'], cnt)) if cnt != len(parts): logger.debug("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename # does the track include total (eg 1/12) if '/' in track: a, b = track.split('/') if check_int(b, 0) and check_int(b, 0) != cnt: logger.debug("%s: Expected %s parts, got %i" % (exists['BookName'], b, cnt)) return book_filename # check all parts have the same author and title for part in parts: if part[1] != book: logger.debug("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.debug("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # strip out just part number for part in parts: if '/' in part[0]: part[0] = part[0].split('/')[0] # do we have any track info (value is 0 if not) if check_int(parts[0][0], 0) == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in ['001.', '01.', '1.', ' 01 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == '001.': pattern = '%s.' % str(cnt).zfill(3) elif tokmatch == '01.': pattern = '%s.' % str(cnt).zfill(2) elif tokmatch == '1.': pattern = '%s.' % str(cnt) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = str(cnt) break # check all parts are present cnt = 0 found = True while found and cnt < len(parts): found = False cnt += 1 for part in parts: trk = check_int(part[0], 0) if trk == cnt: found = True break if not found: logger.debug("%s: No part %i found" % (exists['BookName'], cnt)) return book_filename # if we get here, looks like we have all the parts needed to rename properly for part in parts: pattern = lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE'] pattern = pattern.replace('$Author', author).replace( '$Title', book).replace('$Part', part[0].zfill(len(str( len(parts))))).replace('$Total', str(len(parts))) n = os.path.join(r, pattern + os.path.splitext(part[3])[1]) o = os.path.join(r, part[3]) if o != n: try: shutil.move(o, n) if check_int(part[0], 0) == 1: book_filename = n # return part 1 of set logger.debug('%s: audioRename [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) return book_filename
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if formatter.is_valid_booktype(bookfile, booktype=booktype): got_book = True break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror)) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror)) return False # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. # After the copy completes, delete source files if DESTINATION_COPY not set, # but don't delete source files if copy failed or if in root of download dir for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ formatter.is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile(os.path.join(pp_path, fname), os.path.join( dest_path, global_name + os.path.splitext(fname)[1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % ( fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) # copied the files we want, now delete source files if not in download root dir if not lazylibrarian.DESTINATION_COPY: if pp_path != lazylibrarian.DOWNLOAD_DIR: try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) return False return True
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if formatter.is_valid_booktype(bookfile, booktype=booktype): got_book = True break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, str(why))) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, str(why))) return False # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. # After the copy completes, delete source files if DESTINATION_COPY not set, # but don't delete source files if copy failed or if in root of download dir for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ formatter.is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile(os.path.join(pp_path, fname), os.path.join(dest_path, global_name + '.' + str(fname).split('.')[-1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % (fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) # copied the files we want, now delete source files if not in download root dir if not lazylibrarian.DESTINATION_COPY: if pp_path != lazylibrarian.DOWNLOAD_DIR: try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) return False return True
try: shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (file3, dest_path, str(why))) else: try: shutil.move(pp_path, dest_path) logger.debug('Successfully moved %s to %s.' % (pp_path, dest_path)) except Exception as why: logger.debug("Failed to move %s to %s, %s" % (pp_path, dest_path, str(why))) pp = True # try and rename the actual book file & remove unwanted non-book files for file2 in os.listdir(dest_path): if file2.lower().endswith(".jpg") or file2.lower().endswith(".opf") or formatter.is_valid_booktype(file2, booktype=booktype): logger.debug('Moving %s to directory %s' % (file2, dest_path)) os.rename(os.path.join(dest_path, file2), os.path.join(dest_path, global_name + '.' + str(file2).split('.')[-1])) else: logger.debug('Removing unwanted file: %s' % str(file2)) os.remove(os.path.join(dest_path, file2)) #try: # os.chmod(dest_path, 0777) #except Exception, e: # logger.debug("Could not chmod path: " + str(dest_path)) except OSError, e: logger.error('Could not create destination folder or rename the downloaded ebook/magazine. Check permissions of: ' + lazylibrarian.DESTINATION_DIR) logger.error(str(e)) pp = False
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode('utf-8') subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (dir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" words = files.split('.') extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select('select AuthorName from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug('Updating bookcounts for %i authors' % len(authors)) for author in authors: name = author['AuthorName'] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name)) covers = myDB.action("select count('bookimg') as counter from books where bookimg like 'http%'").fetchone() logger.info("Caching covers for %s books" % covers['counter']) images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"') for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = bookwork.cache_cover(bookid, bookimg) if newimg != bookimg: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) logger.info('Library scan complete')
def magazineScan(thread=None): # rename this thread if thread is None: threading.currentThread().name = "MAGAZINESCAN" myDB = database.DBConnection() mag_path = lazylibrarian.MAG_DEST_FOLDER if '$' in mag_path: mag_path = mag_path.split('$')[0] if lazylibrarian.MAG_RELATIVE: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DESTINATION_DIR, mag_path).encode(lazylibrarian.SYS_ENCODING) else: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.FULL_SCAN: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile="%s"' % issuefile) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select('SELECT COUNT(Title) as counter FROM issues WHERE Title="%s"' % title) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title="%s"' % title) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.MAG_DEST_FILE: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.MAG_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for dirname, dirnames, filenames in os.walk(mag_path): for fname in filenames[:]: # maybe not all magazines will be pdf? if formatter.is_valid_booktype(fname, booktype='mag'): try: match = pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") # print issuedate # print title else: logger.debug("Pattern match failed for [%s]" % fname) continue # title = fname.split('-')[3] # title = title.split('.')[-2] # title = title.strip() # issuedate = fname.split(' ')[0] except: logger.debug("Invalid name format for [%s]" % fname) continue logger.debug("Found Issue %s" % fname) issuefile = os.path.join(dirname, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) # magazines : Title, Frequency, Regex, Status, MagazineAdded, LastAcquired, IssueDate, IssueStatus # issues : Title, IssueAcquired, IssueDate, IssueFile controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.select('SELECT * from magazines WHERE Title="%s"' % title) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Frequency": None, # unused currently "Regex": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "IssueDate": None, "IssueStatus": "Skipped" } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) lastacquired = None magissuedate = None magazineadded = None else: maglastacquired = mag_entry[0]['LastAcquired'] magissuedate = mag_entry[0]['IssueDate'] magazineadded = mag_entry[0]['MagazineAdded'] # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.select('SELECT * from issues WHERE Title="%s" and IssueDate="%s"' % ( title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } logger.debug("Adding issue %s %s" % (title, issuedate)) else: # don't really need to do this each time newValueDict = {"IssueID": issue_id} myDB.upsert("Issues", newValueDict, controlValueDict) create_cover(issuefile) # see if this issues date values are useful # if its a new magazine, magazineadded,magissuedate,lastacquired are all None # if magazineadded is NOT None, but the others are, we've deleted one or more issues # so the most recent dates may be wrong and need to be updated. # Set magazine_issuedate to issuedate of most recent issue we have # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps if magazineadded is None: # new magazine, this might be the only issue controlValueDict = {"Title": title} newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: if iss_acquired < magazineadded: controlValueDict = {"Title": title} newValueDict = {"MagazineAdded": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if maglastacquired is None or iss_acquired > maglastacquired: controlValueDict = {"Title": title} newValueDict = {"LastAcquired": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if magissuedate is None or issuedate > magissuedate: controlValueDict = {"Title": title} newValueDict = {"IssueDate": issuedate} myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.action("select count(*) from magazines").fetchone() isscount = myDB.action("select count(*) from issues").fetchone() logger.info("Magazine scan complete, found %s magazines, %s issues" % (magcount['count(*)'], isscount['count(*)']))
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None, mode=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if is_valid_booktype(bookfile, booktype=booktype): got_book = bookfile break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn( 'Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False # Do we want calibre to import the book for us if bookname and len(lazylibrarian.IMP_CALIBREDB): try: logger.debug('Importing %s into calibre library' % (global_name)) # calibre is broken, ignores metadata.opf and book_name.opf # also ignores --title and --author as parameters # so we have to configure calibre to parse the filename for author/title # and rename the book to the format we want calibre to use for bookfile in os.listdir(pp_path): filename, extn = os.path.splitext(bookfile) # calibre does not like quotes in author names os.rename( os.path.join(pp_path, filename + extn), os.path.join(pp_path, global_name.replace('"', '_') + extn)) params = [ lazylibrarian.IMP_CALIBREDB, 'add', # '--title="%s"' % bookname, # '--author="%s"' % unaccented(authorname), '-1', '--with-library', lazylibrarian.DESTINATION_DIR, pp_path ] logger.debug(str(params)) res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug( '%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res))) # calibre does not like quotes in author names calibre_dir = os.path.join( lazylibrarian.DESTINATION_DIR, unaccented_str(authorname.replace('"', '_')), '') if os.path.isdir(calibre_dir): imported = LibraryScan( calibre_dir ) # rescan authors directory so we get the new book in our database else: logger.error("Failed to locate calibre dir [%s]" % calibre_dir) imported = False # imported = LibraryScan(lazylibrarian.DESTINATION_DIR) # may have to rescan whole library instead if not imported and 'already exist' not in res: return False except subprocess.CalledProcessError as e: logger.debug(params) logger.debug('calibredb import failed: %s' % e.output) return False except OSError as e: logger.debug('calibredb failed, %s' % e.strerror) return False else: # we are copying the files ourselves, either it's a magazine or we don't want to use calibre if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror)) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror)) return False # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile( os.path.join(pp_path, fname), os.path.join(dest_path, global_name + os.path.splitext(fname)[1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % (fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) # calibre or ll copied the files we want, now delete source files if not in download root dir # and if DESTINATION_COPY not set, but don't delete source files if copy failed # also we shouldn't delete if source was a torrent as we may be seeding if mode is None: mode = 'unknown' # no mode for alternate_import if mode is not 'torrent' and mode is not 'magnet': if not lazylibrarian.DESTINATION_COPY: if pp_path != lazylibrarian.DOWNLOAD_DIR: if os.path.isdir(pp_path): # calibre might have already deleted it try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) return False return True
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName)) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = "" count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) language = "Unknown" isbn = "" book = "" author = "" words = files.split(".") extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] if "type" in res: extn = res["type"] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" if author[1] == " ": author = author.replace(" ", ".") author = author.replace("..", ".") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name) ) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author: logger.debug("Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', "").replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid ).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid) ) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if stats["sum(GR_book_hits)"] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"]) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select("select AuthorName from authors") # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug("Updating bookcounts for %i authors" % len(authors)) for author in authors: name = author["AuthorName"] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name ).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name ).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name)) logger.info("Library scan complete")
def processDir(reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DIRECTORY('Download') logger.debug('Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process yet.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: # if torrent, see if we can get current status from the downloader as the name # may have been changed once magnet resolved, or download started or completed # depending on torrent downloader. Usenet doesn't change the name. We like usenet. torrentname = '' try: logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source'])) if book['Source'] == 'TRANSMISSION': torrentname = transmission.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'UTORRENT': torrentname = utorrent.nameTorrent(book['DownloadID']) elif book['Source'] == 'RTORRENT': torrentname = rtorrent.getName(book['DownloadID']) elif book['Source'] == 'QBITTORRENT': torrentname = qbittorrent.getName(book['DownloadID']) elif book['Source'] == 'SYNOLOGY_TOR': torrentname = synology.getName(book['DownloadID']) elif book['Source'] == 'DELUGEWEBUI': torrentname = deluge.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'DELUGERPC': client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) try: client.connect() result = client.call('core.get_torrent_status', book['DownloadID'], {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: torrentname = unaccented_str(result['name']) except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) except Exception as e: logger.debug("Failed to get updated torrent name from %s for %s: %s" % (book['Source'], book['DownloadID'], str(e))) matchtitle = unaccented_str(book['NZBtitle']) if torrentname and torrentname != matchtitle: logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl'])) matchtitle = torrentname # here we could also check percentage downloaded or eta or status? # If downloader says it hasn't completed, no need to look for it. matches = [] logger.info('Looking for %s in %s' % (matchtitle, processpath)) for fname in downloads: # skip if failed before or incomplete torrents, or incomplete btsync extn = os.path.splitext(fname)[1] if extn not in ['.fail', '.part', '.bts', '.!ut']: # This is to get round differences in torrent filenames. # Usenet is ok, but Torrents aren't always returned with the name we searched for # We ask the torrent downloader for the torrent name, but don't always get an answer # so we try to do a "best match" on the name, there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match and match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours logger.debug('filename [%s] is a file' % os.path.join(processpath, fname)) if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname)) if bts_file(processpath): logger.debug("Skipping %s, found a .bts file" % processpath) else: fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) setperm(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: if lazylibrarian.DESTINATION_COPY: shutil.copyfile(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) else: shutil.move(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) except Exception as why: logger.debug("Failed to copy/move file %s to %s, %s" % (ourfile, dirname, str(why))) pp_path = os.path.join(processpath, fname) if os.path.isdir(pp_path): logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle)) if not os.listdir(pp_path): logger.debug("Skipping %s, folder is empty" % pp_path) elif bts_file(pp_path): logger.debug("Skipping %s, found a .bts file" % pp_path) else: matches.append([match, pp_path, book]) else: pp_path = os.path.join(processpath, fname) matches.append([match, pp_path, book]) # so we can report closest match else: logger.debug('Skipping %s' % fname) match = 0 if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] if match and match >= lazylibrarian.DLOAD_RATIO: logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: # it's a book logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # it's a magazine logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(processpath, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) else: # not recognised logger.debug('Nothing in database matching "%s"' % book['BookID']) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) if match: logger.debug(u'Closest match (%s%%): %s' % (match, pp_path)) #for match in matches: # logger.info('Match: %s%% %s' % (match[0], match[1])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) # calibre or ll copied/moved the files we want, now delete source files to_delete = True if book['NZBmode'] in ['torrent', 'magnet']: # Only delete torrents if we don't want to keep seeding if lazylibrarian.KEEP_SEEDING: logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle'])) to_delete = False else: # ask downloader to delete the torrent, but not the files # we may delete them later, depending on other settings if book['DownloadID'] != "unknown": logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower())) delete_task(book['Source'], book['DownloadID'], False) else: logger.warn("Unable to remove %s from %s, no DownloadID" % (book['NZBtitle'], book['Source'].lower())) if to_delete: # only delete the files if not in download root dir and if DESTINATION_COPY not set if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath): if os.path.isdir(pp_path): # calibre might have already deleted it? try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: dname, extn = os.path.splitext(directory) if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " found in download directory") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) # Now check for any that are still marked snatched... if lazylibrarian.TASK_AGE: snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) > 0: for snatch in snatched: # FUTURE: we could check percentage downloaded or eta? # if percentage is increasing, it's just slow try: when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S') when_snatched = time.mktime(when_snatched) diff = time.time() - when_snatched # time difference in seconds except: diff = 0 hours = int(diff / 3600) if hours >= lazylibrarian.TASK_AGE: logger.warn('%s was sent to %s %s hours ago, deleting failed task' % (snatch['NZBtitle'], snatch['Source'].lower(), hours)) # change status to "Failed", and ask downloader to delete task and files if snatch['BookID'] != 'unknown': myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID']) myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID']) delete_task(snatch['Source'], snatch['DownloadID'], True) if reset: scheduleJob(action='Restart', target='processDir') except Exception as e: logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ if not startdir: if not lazylibrarian.DESTINATION_DIR: return 0 else: startdir = lazylibrarian.DESTINATION_DIR if not os.path.isdir(startdir): logger.warn( 'Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: myDB.action('DELETE from stats') logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.info( "Adding new author [%s]" % author) try: addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = book_filename.rsplit(os.sep, 1)[0] coverimg = os.path.join(bookdir, 'cover.jpg') cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') cacheimg = os.path.join(cachedir, bookid + '.jpg') if os.path.isfile(coverimg): copyfile(coverimg, cacheimg) new_book_count += 1 else: logger.debug( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s new/modified book%s found and added to the database" % (new_book_count, plural(new_book_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) # show statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)']))) logger.debug("Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug("Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)']))) logger.debug("Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug("Unable to cache %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug("Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) authors = myDB.select('select AuthorID from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr else: # single author/book import authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg is not None: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg is not None: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) setWorkPages() logger.info('Library scan complete') return new_book_count
def magazineScan(title=None): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() onetitle = title if onetitle: mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace('$Title', onetitle) else: mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER']) if lazylibrarian.CONFIG['MAG_RELATIVE']: mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) if PY2: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile,)) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) # now check the magazine titles and delete any with no issues if lazylibrarian.CONFIG['MAG_DELFOLDER']: mags = myDB.select('SELECT Title,count(Title) as counter from issues group by Title') for mag in mags: title = mag['Title'] issues = mag['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title,)) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: title = match.group("title") issuedate = match.group("issuedate") if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Title pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Title pattern match failed for [%s]" % fname) except Exception: match = False if not match: # noinspection PyBroadException try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Date pattern match failed for [%s]" % fname) except Exception: match = False if not match: title = os.path.basename(rootdir) issuedate = '' dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} if issuedate: exploded = replace_all(issuedate, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: exploded = replace_all(fname, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: logger.warn("Invalid name format for [%s]" % fname) continue issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) if lazylibrarian.CONFIG['MAG_RENAME']: filedate = issuedate if issuedate and issuedate.isdigit(): if len(issuedate) == 8: if check_year(issuedate[:4]): filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4]) else: filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:])) elif len(issuedate) == 12: filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]), issuedate[:4]) else: filedate = str(issuedate).zfill(4) extn = os.path.splitext(fname)[1] newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace( '$IssueDate', filedate) newfname = newfname + extn if newfname and newfname != fname: logger.debug("Rename %s -> %s" % (fname, newfname)) newissuefile = os.path.join(rootdir, newfname) newissuefile = safe_move(issuefile, newissuefile) if os.path.exists(issuefile.replace(extn, '.jpg')): safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg')) if os.path.exists(issuefile.replace(extn, '.opf')): safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf')) issuefile = newissuefile logger.debug("Found %s Issue %s" % (title, issuedate)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title,)) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None, "CoverPage": 1 } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None maglastacquired = None magcoverpage = 1 else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) magcoverpage = mag_entry['CoverPage'] issuedate = str(issuedate).zfill(4) # for sorting issue numbers # is this issue already in the database? issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?', (title, issuedate)) new_entry = False if not iss_entry or iss_entry['IssueFile'] != issuefile: new_entry = True # new entry or name changed if not iss_entry: logger.debug("Adding issue %s %s" % (title, issuedate)) else: logger.debug("Updating issue %s %s" % (title, issuedate)) controlValueDict = {"Title": title, "IssueDate": issuedate} newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore') with open(ignorefile, 'a'): os.utime(ignorefile, None) createMagCover(issuefile, pagenum=magcoverpage, refresh=new_entry) lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_entry) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) else: logger.info("Magazine scan complete") lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def magazineScan(title=None): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() onetitle = title if onetitle: mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace( '$Title', onetitle) else: mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER']) if lazylibrarian.CONFIG['MAG_RELATIVE']: mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) if PY2: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) # now check the magazine titles and delete any with no issues if lazylibrarian.CONFIG['MAG_DELFOLDER']: mags = myDB.select( 'SELECT Title,count(Title) as counter from issues group by Title' ) for mag in mags: title = mag['Title'] issues = mag['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: title = match.group("title") issuedate = match.group("issuedate") if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Title pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug( "Title pattern match failed for [%s]" % fname) except Exception: match = False if not match: # noinspection PyBroadException try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug( "Date pattern match failed for [%s]" % fname) except Exception: match = False if not match: title = os.path.basename(rootdir) issuedate = '' dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# ' } if issuedate: exploded = replace_all(issuedate, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date( exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: exploded = replace_all(fname, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date( exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: logger.warn("Invalid name format for [%s]" % fname) continue issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) if lazylibrarian.CONFIG['MAG_RENAME']: filedate = issuedate if issuedate and issuedate.isdigit(): if len(issuedate) == 8: if check_year(issuedate[:4]): filedate = 'Issue %d %s' % (int( issuedate[4:]), issuedate[:4]) else: filedate = 'Vol %d Iss %d' % (int( issuedate[:4]), int(issuedate[4:])) elif len(issuedate) == 12: filedate = 'Vol %d Iss %d %s' % (int( issuedate[4:8]), int( issuedate[8:]), issuedate[:4]) else: filedate = str(issuedate).zfill(4) extn = os.path.splitext(fname)[1] newfname = lazylibrarian.CONFIG[ 'MAG_DEST_FILE'].replace('$Title', title).replace( '$IssueDate', filedate) newfname = newfname + extn if newfname and newfname != fname: logger.debug("Rename %s -> %s" % (fname, newfname)) newissuefile = os.path.join(rootdir, newfname) newissuefile = safe_move(issuefile, newissuefile) if os.path.exists(issuefile.replace(extn, '.jpg')): safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg')) if os.path.exists(issuefile.replace(extn, '.opf')): safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf')) issuefile = newissuefile logger.debug("Found %s Issue %s" % (title, issuedate)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None, "CoverPage": 1 } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None maglastacquired = None magcoverpage = 1 else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) magcoverpage = mag_entry['CoverPage'] issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?', (title, issuedate)) new_entry = False if not iss_entry or iss_entry['IssueFile'] != issuefile: new_entry = True # new entry or name changed if not iss_entry: logger.debug("Adding issue %s %s" % (title, issuedate)) else: logger.debug("Updating issue %s %s" % (title, issuedate)) controlValueDict = { "Title": title, "IssueDate": issuedate } newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore') with open(ignorefile, 'a'): os.utime(ignorefile, None) createMagCover(issuefile, pagenum=magcoverpage, refresh=new_entry) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id, overwrite=new_entry) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info( "Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) else: logger.info("Magazine scan complete") lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def audioRename(bookid): for item in ['$Part', '$Title']: if item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioRename, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioRename %s" % bookid) return '' else: logger.debug("Invalid bookid in audioRename %s" % bookid) return '' if not TinyTag: logger.warn("TinyTag library not available") return '' cnt = 0 parts = [] author = '' book = '' total = 0 audio_file = '' for f in os.listdir(makeBytestr(r)): f = makeUnicode(f) if is_valid_booktype(f, booktype='audiobook'): cnt += 1 audio_file = f try: id3r = TinyTag.get(os.path.join(r, f)) performer = id3r.artist composer = id3r.composer book = id3r.album track = id3r.track total = id3r.track_total track = check_int(track, 0) total = check_int(total, 0) if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer if author and book: parts.append([track, book, author, f]) except Exception as e: logger.error("tinytag %s %s" % (type(e).__name__, str(e))) pass logger.debug("%s found %s audiofile%s" % (exists['BookName'], cnt, plural(cnt))) if cnt == 1 and not parts: # single file audiobook parts = [1, exists['BookName'], exists['AuthorName'], audio_file] if cnt != len(parts): logger.warn("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename if total and total != cnt: logger.warn("%s: Reported %i parts, got %i" % (exists['BookName'], total, cnt)) return book_filename # check all parts have the same author and title if len(parts) > 1: for part in parts: if part[1] != book: logger.warn("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.warn("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # do we have any track info (value is 0 if not) if parts[0][0] == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == ' 001.': pattern = ' %s.' % str(cnt).zfill(3) elif tokmatch == ' 01.': pattern = ' %s.' % str(cnt).zfill(2) elif tokmatch == ' 1.': pattern = ' %s.' % str(cnt) elif tokmatch == ' 001 ': pattern = ' %s ' % str(cnt).zfill(3) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) elif tokmatch == ' 1 ': pattern = ' %s ' % str(cnt) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = cnt break # check all parts are present cnt = 0 found = True while found and cnt < len(parts): found = False cnt += 1 for part in parts: trk = part[0] if trk == cnt: found = True break if not found: logger.warn("%s: No part %i found" % (exists['BookName'], cnt)) return book_filename # if we get here, looks like we have all the parts needed to rename properly seriesinfo = seriesInfo(bookid) dest_path = lazylibrarian.CONFIG['EBOOK_DEST_FOLDER'].replace( '$Author', author).replace( '$Title', book).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') dest_path = ' '.join(dest_path.split()).strip() dest_path = replace_all(dest_path, __dic__) dest_dir = lazylibrarian.DIRECTORY('Audio') dest_path = os.path.join(dest_dir, dest_path) if r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) for part in parts: pattern = lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE'] seriesinfo = seriesInfo(bookid) pattern = pattern.replace( '$Author', author).replace( '$Title', book).replace( '$Part', str(part[0]).zfill(len(str(len(parts))))).replace( '$Total', str(len(parts))).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') pattern = ' '.join(pattern.split()).strip() n = os.path.join(r, pattern + os.path.splitext(part[3])[1]) o = os.path.join(r, part[3]) if o != n: try: n = safe_move(o, n) if part[0] == 1: book_filename = n # return part 1 of set logger.debug('%s: audioRename [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) return book_filename
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.info(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s at %s" % (global_name, now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')
def magazineScan(): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'] mag_path = mag_path.split('$')[0] if lazylibrarian.CONFIG['MAG_RELATIVE']: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN']: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select( 'SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title, )) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") match = True else: match = False except Exception: match = False if not match: try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) else: logger.debug("Pattern match failed for [%s]" % fname) continue except Exception as e: logger.debug("Invalid name format for [%s] %s %s" % (fname, type(e).__name__, str(e))) continue logger.debug("Found %s Issue %s" % (title, fname)) issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title from issues WHERE Title=? and IssueDate=?', (title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) logger.debug("Adding issue %s %s" % (title, issuedate)) create_cover(issuefile) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if is_valid_booktype(bookfile, booktype=booktype): got_book = bookfile break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False # Do we want calibre to import the book for us if bookname and len(lazylibrarian.IMP_CALIBREDB): try: logger.debug('Creating metadata for calibre') dest_path=pp_path global_name=os.path.splitext(got_book)[0] bookid='' booklang='' bookisbn='' bookpub='' bookdate='' bookdesc='' processOPF(dest_path, authorname, bookname, bookisbn, bookid, bookpub, bookdate, bookdesc, booklang, global_name) logger.debug('Importing %s, %s into calibre library' % (authorname, bookname)) params = [lazylibrarian.IMP_CALIBREDB, 'add', '-1', '--with-library', lazylibrarian.DESTINATION_DIR, pp_path] logger.debug(str(params)) res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug('%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res))) calibre_dir = os.path.join(lazylibrarian.DESTINATION_DIR, unaccented_str(authorname), '') if os.path.isdir(calibre_dir): imported = LibraryScan(calibre_dir) # rescan authors directory so we get the new book in our database else: imported = LibraryScan(lazylibrarian.DESTINATION_DIR) # may have to rescan whole library instead if not imported and not 'already exist' in res: return False except subprocess.CalledProcessError as e: logger.debug(params) logger.debug('calibredb import failed: %s' % e.output) return False except OSError as e: logger.debug('calibredb failed, %s' % e.strerror) return False else: # we are copying the files ourselves if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror)) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror)) return False # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. # After the copy completes, delete source files if DESTINATION_COPY not set, # but don't delete source files if copy failed or if in root of download dir for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile(os.path.join(pp_path, fname), os.path.join( dest_path, global_name + os.path.splitext(fname)[1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % ( fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) # calibre or ll copied the files we want, now delete source files if not in download root dir if not lazylibrarian.DESTINATION_COPY: if pp_path != lazylibrarian.DOWNLOAD_DIR: try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) return False return True
def magazineScan(thread=None): # rename this thread if thread == None: threading.currentThread().name = "MAGAZINESCAN" myDB = database.DBConnection() mag_path = lazylibrarian.MAG_DEST_FOLDER if "$" in mag_path: mag_path = mag_path.split("$")[0] if lazylibrarian.MAG_RELATIVE: if mag_path[0] not in "._": mag_path = "_" + mag_path mag_path = os.path.join(lazylibrarian.DESTINATION_DIR, mag_path).encode(lazylibrarian.SYS_ENCODING) else: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.FULL_SCAN: mags = myDB.select("select * from Issues") for mag in mags: title = mag["Title"] issuedate = mag["IssueDate"] issuefile = mag["IssueFile"] if not issuefile and os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile="%s"' % issuefile) logger.info("Issue %s - %s deleted as not found on disk" % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "IssueStatus": "Skipped", # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug("Magazine %s details reset" % title) logger.info(" Checking [%s] for magazines" % mag_path) for dirname, dirnames, filenames in os.walk(mag_path): for fname in filenames[:]: # if fname.endswith('.pdf'): maybe not all magazines will be pdf? if formatter.is_valid_booktype(fname): try: title = fname.split("-")[3] title = title.split(".")[-2] title = title.strip() issuedate = fname.split(" ")[0] issuefile = os.path.join(dirname, fname) # full path to issue.pdf logger.debug("Found Issue %s" % fname) except: logger.debug("Invalid name format for %s" % fname) continue mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) # magazines table: Title, Frequency, Regex, Status, MagazineAdded, LastAcquired, IssueDate, IssueStatus # issues table: Title, IssueAcquired, IssueDate, IssueFile controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.select('SELECT * from magazines WHERE Title="%s"' % title) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Frequency": "Monthly", # no idea really, set a default value "Regex": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "IssueDate": None, "IssueStatus": "Skipped", } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) lastacquired = None magissuedate = None magazineadded = None else: maglastacquired = mag_entry[0]["LastAcquired"] magissuedate = mag_entry[0]["IssueDate"] magazineadded = mag_entry[0]["MagazineAdded"] # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} iss_entry = myDB.select('SELECT * from issues WHERE Title="%s" and IssueDate="%s"' % (title, issuedate)) if not iss_entry: newValueDict = {"IssueAcquired": iss_acquired, "IssueFile": issuefile} logger.debug("Adding issue %s %s" % (title, issuedate)) myDB.upsert("Issues", newValueDict, controlValueDict) # see if this issues date values are useful # if its a new magazine, magazineadded,magissuedate,lastacquired are all None # if magazineadded is NOT None, but the others are, we've deleted one or more issues # so the most recent dates may be wrong and need to be updated. # Set magazine_issuedate to issuedate of most recent issue we have # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps if magazineadded == None: # new magazine, this might be the only issue controlValueDict = {"Title": title} newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "IssueDate": issuedate, "IssueStatus": "Open", } myDB.upsert("magazines", newValueDict, controlValueDict) else: if iss_acquired < magazineadded: controlValueDict = {"Title": title} newValueDict = {"MagazineAdded": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if maglastacquired == None or iss_acquired > maglastacquired: controlValueDict = {"Title": title} newValueDict = {"LastAcquired": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if magissuedate == None or issuedate > magissuedate: controlValueDict = {"Title": title} newValueDict = {"IssueDate": issuedate} myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.action("select count(*) from magazines").fetchone() isscount = myDB.action("select count(*) from issues").fetchone() logger.info("Magazine scan complete, found %s magazines, %s issues" % (magcount["count(*)"], isscount["count(*)"]))
def audioProcess(bookid, rename=False, playlist=False): """ :param bookid: book to process :param rename: rename to match audiobook filename pattern :param playlist: generate a playlist for popup :return: filename of part 01 of the audiobook """ for item in ['$Part', '$Title']: if rename and item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioProcess, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioProcess" % bookid) return '' else: logger.debug("Invalid bookid in audioProcess %s" % bookid) return '' if not TinyTag: logger.warn("TinyTag library not available") return '' cnt = 0 parts = [] total = 0 author = '' book = '' audio_file = '' abridged = '' for f in os.listdir(makeBytestr(r)): f = makeUnicode(f) if is_valid_booktype(f, booktype='audiobook'): cnt += 1 audio_file = f try: audio_path = os.path.join(r, f) performer = '' composer = '' albumartist = '' book = '' title = '' track = 0 total = 0 if TinyTag.is_supported(audio_path): id3r = TinyTag.get(audio_path) performer = id3r.artist composer = id3r.composer albumartist = id3r.albumartist book = id3r.album title = id3r.title track = id3r.track total = id3r.track_total track = check_int(track, 0) total = check_int(total, 0) if performer: performer = performer.strip() if composer: composer = composer.strip() if book: book = book.strip() if albumartist: albumartist = albumartist.strip() if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer elif albumartist: author = albumartist if author and book: parts.append([track, book, author, f]) if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'unabridged' in tag.lower(): abridged = 'Unabridged' break if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'abridged' in tag.lower(): abridged = 'Abridged' break except Exception as e: logger.error("tinytag %s %s" % (type(e).__name__, str(e))) pass finally: if not abridged: if audio_file and 'unabridged' in audio_file.lower(): abridged = 'Unabridged' break if not abridged: if audio_file and 'abridged' in audio_file.lower(): abridged = 'Abridged' break logger.debug("%s found %s audiofile%s" % (exists['BookName'], cnt, plural(cnt))) if cnt == 1 and not parts: # single file audiobook with no tags parts = [[1, exists['BookName'], exists['AuthorName'], audio_file]] if cnt != len(parts): logger.warn("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename if total and total != cnt: logger.warn("%s: Reported %i parts, got %i" % (exists['BookName'], total, cnt)) return book_filename # check all parts have the same author and title if len(parts) > 1: for part in parts: if part[1] != book: logger.warn("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.warn("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # do we have any track info (value is 0 if not) if parts[0][0] == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == ' 001.': pattern = ' %s.' % str(cnt).zfill(3) elif tokmatch == ' 01.': pattern = ' %s.' % str(cnt).zfill(2) elif tokmatch == ' 1.': pattern = ' %s.' % str(cnt) elif tokmatch == ' 001 ': pattern = ' %s ' % str(cnt).zfill(3) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) elif tokmatch == ' 1 ': pattern = ' %s ' % str(cnt) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = cnt break parts.sort(key=lambda x: x[0]) # check all parts are present cnt = 0 while cnt < len(parts): if parts[cnt][0] != cnt + 1: logger.warn("%s: No part %i found" % (exists['BookName'], cnt + 1)) return book_filename cnt += 1 if abridged: abridged = ' (%s)' % abridged # if we get here, looks like we have all the parts needed to rename properly seriesinfo = nameVars(bookid, abridged) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('Audio') dest_path = os.path.join(dest_dir, dest_path) if rename and r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) if playlist: try: playlist = open(os.path.join(r, 'playlist.ll'), 'w') except Exception as why: logger.error('Unable to create playlist in %s: %s' % (r, why)) playlist = None for part in parts: pattern = seriesinfo['AudioFile'] pattern = pattern.replace( '$Part', str(part[0]).zfill(len(str(len(parts))))).replace( '$Total', str(len(parts))) pattern = ' '.join(pattern.split()).strip() pattern = pattern + os.path.splitext(part[3])[1] if playlist: if rename: playlist.write(pattern + '\n') else: playlist.write(part[3] + '\n') if rename: n = os.path.join(r, pattern) o = os.path.join(r, part[3]) if o != n: try: n = safe_move(o, n) if part[0] == 1: book_filename = n # return part 1 of set logger.debug('%s: audioProcess [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) if playlist: playlist.close() return book_filename
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None): # check we got a book/magazine in the downloaded files, if not, return if bookname: booktype = 'book' else: booktype = 'mag' got_book = False for bookfile in os.listdir(pp_path): if is_valid_booktype(bookfile, booktype=booktype): got_book = bookfile break if got_book is False: # no book/mag found in a format we wanted. Leave for the user to delete or convert manually logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path) return False # Do we want calibre to import the book for us if bookname and len(lazylibrarian.IMP_CALIBREDB): processpath = lazylibrarian.DIRECTORY('Destination') try: logger.debug('Importing %s into calibre library' % (global_name)) # calibre is broken, ignores metadata.opf and book_name.opf # also ignores --title and --author as parameters # so we have to configure calibre to parse the filename for author/title # and rename the book to the format we want calibre to use for bookfile in os.listdir(pp_path): filename, extn = os.path.splitext(bookfile) # calibre does not like quotes in author names os.rename(os.path.join(pp_path, filename + extn), os.path.join( pp_path, global_name.replace('"', '_') + extn)) params = [lazylibrarian.IMP_CALIBREDB, 'add', # '--title="%s"' % bookname, # '--author="%s"' % unaccented(authorname), '-1', '--with-library', processpath, pp_path ] logger.debug(str(params)) res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug('%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res))) if 'already exist' in res: logger.warn('Calibre failed to import %s %s, reports book already exists' % (authorname, bookname)) # calibre does not like quotes in author names calibre_dir = os.path.join(processpath, unaccented_str(authorname.replace('"', '_')), '') if os.path.isdir(calibre_dir): imported = LibraryScan(calibre_dir) # rescan authors directory so we get the new book in our database else: logger.error("Failed to locate calibre dir [%s]" % calibre_dir) imported = False # imported = LibraryScan(processpath) # may have to rescan whole library instead if not imported: return False except subprocess.CalledProcessError as e: logger.debug(params) logger.debug('calibredb import failed: %s' % e.output) return False except OSError as e: logger.debug('calibredb failed, %s' % e.strerror) return False else: # we are copying the files ourselves, either it's a magazine or we don't want to use calibre if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) elif not os.path.isdir(dest_path): logger.debug('%s exists but is not a directory, deleting it' % dest_path) try: os.remove(dest_path) except OSError as why: logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror)) return False if not os.path.exists(dest_path): try: os.makedirs(dest_path) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror)) return False setperm(dest_path) # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly. for fname in os.listdir(pp_path): if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \ is_valid_booktype(fname, booktype=booktype): logger.debug('Copying %s to directory %s' % (fname, dest_path)) try: shutil.copyfile(os.path.join(pp_path, fname), os.path.join( dest_path, global_name + os.path.splitext(fname)[1])) setperm(os.path.join(dest_path, global_name + os.path.splitext(fname)[1])) except Exception as why: logger.debug("Failed to copy file %s to %s, %s" % ( fname, dest_path, str(why))) return False else: logger.debug('Ignoring unwanted file: %s' % fname) return True
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) try: # noinspection PyTypeChecker calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename(r) logger.debug(msg) return f seriesinfo = seriesInfo(bookid) dest_path = lazylibrarian.CONFIG['EBOOK_DEST_FOLDER'].replace( '$Author', exists['AuthorName']).replace( '$Title', exists['BookName']).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') dest_path = ' '.join(dest_path.split()).strip() dest_path = replace_all(dest_path, __dic__) dest_dir = lazylibrarian.DIRECTORY('eBook') dest_path = os.path.join(dest_dir, dest_path) if r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = lazylibrarian.CONFIG['EBOOK_DEST_FILE'] seriesinfo = seriesInfo(bookid) new_basename = new_basename.replace( '$Author', exists['AuthorName']).replace( '$Title', exists['BookName']).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') new_basename = ' '.join(new_basename.split()).strip() # replace all '/' not surrounded by whitespace with '_' as '/' is a directory separator slash = new_basename.find('/') while slash > 0: if new_basename[slash - 1] != ' ': if new_basename[slash + 1] != ' ': new_basename = new_basename[:slash] + '_' + new_basename[slash + 1:] slash = new_basename.find('/', slash + 1) if ' / ' in new_basename: # used as a separator in goodreads omnibus logger.warn("bookRename [%s] looks like an omnibus? Not renaming %s" % (new_basename, book_basename)) new_basename = book_basename if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(makeBytestr(r)): fname = makeUnicode(fname) extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith('.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(r, fname) nfname = os.path.join(r, new_basename + extn) try: nfname = safe_move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) if ofname == exists['BookFile']: # if we renamed the preferred filetype, return new name f = nfname except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') new_authors = [] logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action( 'update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action( 'update books set BookFile="" where BookID="%s"' % bookID) logger.warn( 'Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same # subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR late match = 0 extn = "" if '.' in files: words = files.split('.') extn = words[len(words) - 1] if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" match = 1 logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata # from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) match = 1 else: logger.debug("Book meta incomplete in %s" % book_filename) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: processed_subdirectories.append( subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" if not formatter.is_valid_isbn(isbn): isbn = "" if isbn != "" and language != "Unknown": logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a # different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already # there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join( r, files).encode( lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) if new_book_count: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)']) stats = len( myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn( "There are %s books in your library with unknown language" % stats) logger.debug('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' % ('Open', auth))) myDB.action( 'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks, auth)) totalbooks = len( myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' % ('Ignored', auth))) myDB.action( 'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks, auth)) logger.info('Library scan complete')
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir( lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural( len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # not a directory, handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug( 'Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: shutil.move( os.path.join( processpath, ourfile), os.path.join( dirname, ourfile)) except Exception as why: logger.debug( "Failed to move file %s to %s, %s" % (ourfile, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.debug(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower( ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace( '/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[ 'IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join( lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode( lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug( "Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['NZBmode']) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = { "Status": "Processed", "NZBDate": now() } # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug( 'Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str( book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int( book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = { "LastAcquired": today(), "IssueStatus": "Open" } else: newValueDict = { "IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = { "Title": book['BookID'], "IssueDate": book['AuxInfo'] } newValueDict = { "IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action( 'UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir( processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')