def setTorrentPath(result): logger.debug('Deluge: Setting download path') if not any(delugeweb_auth): _get_auth() if lazylibrarian.DIRECTORY('Download'): post_data = json.dumps({"method": "core.set_torrent_move_completed", "params": [result['hash'], True], "id": 7}) post_data = post_data.encode(lazylibrarian.SYS_ENCODING) _ = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) move_to = lazylibrarian.DIRECTORY('Download') if not os.path.exists(move_to): logger.debug('Deluge: %s directory doesn\'t exist, let\'s create it' % move_to) os.makedirs(move_to) setperm(move_to) post_data = json.dumps({"method": "core.set_torrent_move_completed_path", "params": [result['hash'], move_to], "id": 8}) post_data = post_data.encode(lazylibrarian.SYS_ENCODING) response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) return not json.loads(response.text)['error'] return True
def addTorrent(link, directory=None): method = 'torrent-add' if directory is None: directory = lazylibrarian.DIRECTORY('Download') arguments = {'filename': link, 'download-dir': directory} response = torrentAction(method, arguments) # type: dict if not response: return False if response['result'] == 'success': if 'torrent-added' in response['arguments']: retid = response['arguments']['torrent-added']['id'] elif 'torrent-duplicate' in response['arguments']: retid = response['arguments']['torrent-duplicate']['id'] else: retid = False logger.debug("Torrent sent to Transmission successfully") return retid else: logger.debug('Transmission returned status %s' % response['result']) return False
def addTorrent(link, directory=None): method = 'torrent-add' # print type(link), link # if link.endswith('.torrent'): # with open(link, 'rb') as f: # metainfo = str(base64.b64encode(f.read())) # arguments = {'metainfo': metainfo } # else: if directory is None: directory = lazylibrarian.DIRECTORY('Download') arguments = {'filename': link, 'download-dir': directory} response = torrentAction(method, arguments) if not response: return False if response['result'] == 'success': if 'torrent-added' in response['arguments']: retid = response['arguments']['torrent-added']['id'] elif 'torrent-duplicate' in response['arguments']: retid = response['arguments']['torrent-duplicate']['id'] else: retid = False logger.debug(u"Torrent sent to Transmission successfully") return retid else: logger.debug('Transmission returned status %s' % response['result']) return False
def addTorrent(link, hashid): logger.debug('addTorrent(%s)' % link) qbclient = qbittorrentclient() args = {'savepath': lazylibrarian.DIRECTORY('Download')} if lazylibrarian.CONFIG['QBITTORRENT_LABEL']: if 6 < qbclient.api < 10: args['label'] = lazylibrarian.CONFIG['QBITTORRENT_LABEL'] elif qbclient.api >= 10: args['category'] = lazylibrarian.CONFIG['QBITTORRENT_LABEL'] logger.debug('addTorrent args(%s)' % args) args['urls'] = link # noinspection PyProtectedMember if qbclient._command('command/download', args, 'multipart/form-data'): return True # sometimes returns "Fails." when it hasn't failed, so look if hashid was added correctly logger.debug("qBittorrent: addTorrent thinks it failed") time.sleep(2) # noinspection PyProtectedMember torrents = qbclient._get_list() if hashid.upper() in str(torrents).upper(): logger.debug( "qBittorrent: hashid found in torrent list, assume success") return True logger.debug( "qBittorrent: hashid not found in torrent list, addTorrent failed") return False
def addTorrent(link): logger.debug('addTorrent(%s)' % link) qbclient = qbittorrentclient() args = {'urls': link, 'savepath': lazylibrarian.DIRECTORY('Download')} if lazylibrarian.CONFIG['QBITTORRENT_LABEL']: args['label'] = lazylibrarian.CONFIG['QBITTORRENT_LABEL'] return qbclient._command('command/download', args, 'application/x-www-form-urlencoded')
def DirectDownloadMethod(bookid=None, tor_title=None, tor_url=None, bookname=None): myDB = database.DBConnection() downloadID = False Source = "DIRECT" full_url = tor_url # keep the url as stored in "wanted" table request = urllib2.Request(ur'%s' % tor_url) if lazylibrarian.CONFIG['PROXY_HOST']: request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], lazylibrarian.CONFIG['PROXY_TYPE']) request.add_header('Accept-encoding', 'gzip') request.add_header('User-Agent', USER_AGENT) try: response = urllib2.urlopen(request, timeout=90) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) fdata = f.read() else: fdata = response.read() bookname = '.'.join(bookname.rsplit(' ', 1)) # last word is the extension logger.debug("File download got %s bytes for %s/%s" % (len(fdata), tor_title, bookname)) destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), tor_title) try: os.makedirs(destdir) setperm(destdir) except OSError as e: if e.errno is not 17: # directory already exists is ok. Using errno because of different languages logger.debug("Error creating directory %s, %s" % (destdir, e.strerror)) destfile = os.path.join(destdir, bookname) try: with open(destfile, 'wb') as bookfile: bookfile.write(fdata) setperm(destfile) downloadID = True except Exception as e: logger.debug("Error writing book to %s, %s" % (destfile, str(e))) except socket.timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except urllib2.URLError as e: logger.warn('Error fetching file from url: %s, %s' % (tor_url, e.reason)) return False if downloadID: logger.debug(u'File %s has been downloaded from %s' % (tor_title, tor_url)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' % (Source, downloadID, full_url)) return True else: logger.error(u'Failed to download file @ <a href="%s">%s</a>' % (full_url, tor_url)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url) return False
def DirectDownloadMethod(bookid=None, tor_title=None, tor_url=None, bookname=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = "DIRECT" logger.debug("Starting Direct Download for [%s]" % bookname) proxies = proxyList() headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} try: r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e))) return False bookname = '.'.join(bookname.rsplit(' ', 1)) # last word is the extension logger.debug("File download got %s bytes for %s/%s" % (len(r.content), tor_title, bookname)) destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), tor_title) try: os.makedirs(destdir) setperm(destdir) except OSError as e: if not os.path.isdir(destdir): logger.debug("Error creating directory %s, %s" % (destdir, e)) destfile = os.path.join(destdir, bookname) try: with open(destfile, 'wb') as bookfile: bookfile.write(r.content) setperm(destfile) downloadID = True except Exception as e: logger.error("%s writing book to %s, %s" % (type(e).__name__, destfile, e)) if downloadID: logger.debug('File %s has been downloaded from %s' % (tor_title, tor_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,)) elif library == 'AudioBook': myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,)) myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, tor_url)) return True else: logger.error('Failed to download file @ <a href="%s">%s</a>' % (tor_url, tor_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (tor_url,)) return False
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) try: # noinspection PyTypeChecker calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename(r) logger.debug(msg) return f seriesinfo = seriesInfo(bookid) dest_path = lazylibrarian.CONFIG['EBOOK_DEST_FOLDER'].replace( '$Author', exists['AuthorName']).replace( '$Title', exists['BookName']).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') dest_path = ' '.join(dest_path.split()).strip() dest_path = replace_all(dest_path, __dic__) dest_dir = lazylibrarian.DIRECTORY('eBook') dest_path = os.path.join(dest_dir, dest_path) if r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = lazylibrarian.CONFIG['EBOOK_DEST_FILE'] seriesinfo = seriesInfo(bookid) new_basename = new_basename.replace( '$Author', exists['AuthorName']).replace( '$Title', exists['BookName']).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') new_basename = ' '.join(new_basename.split()).strip() # replace all '/' not surrounded by whitespace with '_' as '/' is a directory separator slash = new_basename.find('/') while slash > 0: if new_basename[slash - 1] != ' ': if new_basename[slash + 1] != ' ': new_basename = new_basename[:slash] + '_' + new_basename[slash + 1:] slash = new_basename.find('/', slash + 1) if ' / ' in new_basename: # used as a separator in goodreads omnibus logger.warn("bookRename [%s] looks like an omnibus? Not renaming %s" % (new_basename, book_basename)) new_basename = book_basename if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(makeBytestr(r)): fname = makeUnicode(fname) extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith('.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(r, fname) nfname = os.path.join(r, new_basename + extn) try: nfname = safe_move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) if ofname == exists['BookFile']: # if we renamed the preferred filetype, return new name f = nfname except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def calibredb(cmd=None, prelib=None, postlib=None): """ calibre-server needs to be started with --enable-auth and needs user/password to add/remove books only basic features are available without auth. calibre_server should look like http://address:port/#library default library is used if no #library in the url or calibredb can talk to the database file as long as there is no running calibre """ if not lazylibrarian.CONFIG['IMP_CALIBREDB']: return "No calibredb set in config", '', 1 params = [lazylibrarian.CONFIG['IMP_CALIBREDB'], cmd] if lazylibrarian.CONFIG['CALIBRE_USE_SERVER']: dest_url = lazylibrarian.CONFIG['CALIBRE_SERVER'] if lazylibrarian.CONFIG['CALIBRE_USER'] and lazylibrarian.CONFIG[ 'CALIBRE_PASS']: params.extend([ '--username', lazylibrarian.CONFIG['CALIBRE_USER'], '--password', lazylibrarian.CONFIG['CALIBRE_PASS'] ]) else: dest_url = lazylibrarian.DIRECTORY('eBook') if prelib: params.extend(prelib) if cmd != "--version": params.extend(['--with-library', '%s' % dest_url]) if postlib: params.extend(postlib) logger.debug(str(params)) res = '' try: p = Popen(params, stdout=PIPE, stderr=PIPE) res, err = p.communicate() rc = p.returncode if rc: if 'Errno 111' in err: logger.debug("calibredb returned %s: Connection refused" % rc) else: logger.debug("calibredb returned %s: res[%s] err[%s]" % (rc, res, err)) except Exception as e: err = "calibredb exception: %s %s" % (type(e).__name__, str(e)) logger.debug(err) rc = 1 if rc and dest_url.startswith('http') and not res.startswith('Forbidden'): # if not forbidden (auth issue), might be no server running, retry using file params = [lazylibrarian.CONFIG['IMP_CALIBREDB'], cmd] if prelib: params.extend(prelib) dest_url = lazylibrarian.DIRECTORY('eBook') params.extend(['--with-library', dest_url]) if postlib: params.extend(postlib) logger.debug(str(params)) try: q = Popen(params, stdout=PIPE, stderr=PIPE) res, err = q.communicate() rc = q.returncode if rc: logger.debug("calibredb retry returned %s: res[%s] err[%s]" % (rc, res, err)) except Exception as e: err = "calibredb retry exception: %s %s" % (type(e).__name__, str(e)) logger.debug(err) rc = 1 if rc: return res, err, rc else: return res, dest_url, 0
def DirectDownloadMethod(bookid=None, dl_title=None, dl_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = "DIRECT" logger.debug("Starting Direct Download for [%s]" % dl_title) proxies = proxyList() headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} try: r = requests.get(dl_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % dl_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, str(e))) return False if not str(r.status_code).startswith('2'): logger.debug("Got a %s response for %s" % (r.status_code, dl_url)) elif len(r.content) < 1000: logger.debug("Only got %s bytes for %s, rejecting" % (len(r.content), dl_title)) else: extn = '' basename = '' if ' ' in dl_title: basename, extn = dl_title.rsplit( ' ', 1) # last word is often the extension - but not always... if extn and extn in getList(lazylibrarian.CONFIG['EBOOK_TYPE']): dl_title = '.'.join(dl_title.rsplit(' ', 1)) elif magic: mtype = magic.from_buffer(r.content) if 'EPUB' in mtype: extn = '.epub' elif 'Mobipocket' in mtype: # also true for azw and azw3, does it matter? extn = '.mobi' elif 'PDF' in mtype: extn = '.pdf' else: logger.debug("magic reports %s" % mtype) basename = dl_title else: logger.warn("Don't know the filetype for %s" % dl_title) basename = dl_title logger.debug("File download got %s bytes for %s" % (len(r.content), dl_title)) destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), basename) # destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), '%s LL.(%s)' % (basename, bookid)) if not os.path.isdir(destdir): _ = mymakedirs(destdir) try: hashid = dl_url.split("md5=")[1].split("&")[0] except IndexError: hashid = sha1(encode(dl_url)).hexdigest() destfile = os.path.join(destdir, basename + extn) try: with open(destfile, 'wb') as bookfile: bookfile.write(r.content) setperm(destfile) downloadID = hashid except Exception as e: logger.error("%s writing book to %s, %s" % (type(e).__name__, destfile, e)) if downloadID: logger.debug('File %s has been downloaded from %s' % (dl_title, dl_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, dl_url)) return True else: logger.error('Failed to download file @ <a href="%s">%s</a>' % (dl_url, dl_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (dl_url, )) return False
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) if not lazylibrarian.CONFIG['CALIBRE_RENAME']: try: # noinspection PyTypeChecker calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename(r) logger.debug(msg) return f reject = multibook(r) if reject: logger.debug("Not renaming %s, found multiple %s" % (f, reject)) return f seriesinfo = nameVars(bookid) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('eBook') dest_path = os.path.join(dest_dir, dest_path) dest_path = stripspaces(dest_path) oldpath = r if oldpath != dest_path: try: dest_path = safe_move(oldpath, dest_path) except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = seriesinfo['BookFile'] if ' / ' in new_basename: # used as a separator in goodreads omnibus logger.warn("bookRename [%s] looks like an omnibus? Not renaming %s" % (new_basename, book_basename)) new_basename = book_basename if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(makeBytestr(dest_path)): fname = makeUnicode(fname) extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith('.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(dest_path, fname) nfname = os.path.join(dest_path, new_basename + extn) if ofname != nfname: try: nfname = safe_move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) oldname = os.path.join(oldpath, fname) if oldname == exists['BookFile']: # if we renamed/moved the preferred file, return new name f = nfname except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def magazineScan(): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'] mag_path = mag_path.split('$')[0] if lazylibrarian.CONFIG['MAG_RELATIVE']: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN']: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select( 'SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title, )) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") match = True else: match = False except Exception: match = False if not match: try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) else: logger.debug("Pattern match failed for [%s]" % fname) continue except Exception as e: logger.debug("Invalid name format for [%s] %s %s" % (fname, type(e).__name__, str(e))) continue logger.debug("Found %s Issue %s" % (title, fname)) issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title from issues WHERE Title=? and IssueDate=?', (title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) logger.debug("Adding issue %s %s" % (title, issuedate)) create_cover(issuefile) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def magazineScan(title=None): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() onetitle = title if onetitle: mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace( '$Title', onetitle) else: mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER']) if lazylibrarian.CONFIG['MAG_RELATIVE']: mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) if PY2: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) # now check the magazine titles and delete any with no issues if lazylibrarian.CONFIG['MAG_DELFOLDER']: mags = myDB.select( 'SELECT Title,count(Title) as counter from issues group by Title' ) for mag in mags: title = mag['Title'] issues = mag['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: title = match.group("title") issuedate = match.group("issuedate") if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Title pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug( "Title pattern match failed for [%s]" % fname) except Exception: match = False if not match: # noinspection PyBroadException try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug( "Date pattern match failed for [%s]" % fname) except Exception: match = False if not match: title = os.path.basename(rootdir) issuedate = '' dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# ' } if issuedate: exploded = replace_all(issuedate, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date( exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: exploded = replace_all(fname, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date( exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: logger.warn("Invalid name format for [%s]" % fname) continue issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) if lazylibrarian.CONFIG['MAG_RENAME']: filedate = issuedate if issuedate and issuedate.isdigit(): if len(issuedate) == 8: if check_year(issuedate[:4]): filedate = 'Issue %d %s' % (int( issuedate[4:]), issuedate[:4]) else: filedate = 'Vol %d Iss %d' % (int( issuedate[:4]), int(issuedate[4:])) elif len(issuedate) == 12: filedate = 'Vol %d Iss %d %s' % (int( issuedate[4:8]), int( issuedate[8:]), issuedate[:4]) else: filedate = str(issuedate).zfill(4) extn = os.path.splitext(fname)[1] newfname = lazylibrarian.CONFIG[ 'MAG_DEST_FILE'].replace('$Title', title).replace( '$IssueDate', filedate) newfname = newfname + extn if newfname and newfname != fname: logger.debug("Rename %s -> %s" % (fname, newfname)) newissuefile = os.path.join(rootdir, newfname) newissuefile = safe_move(issuefile, newissuefile) if os.path.exists(issuefile.replace(extn, '.jpg')): safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg')) if os.path.exists(issuefile.replace(extn, '.opf')): safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf')) issuefile = newissuefile logger.debug("Found %s Issue %s" % (title, issuedate)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None, "CoverPage": 1 } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None maglastacquired = None magcoverpage = 1 else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) magcoverpage = mag_entry['CoverPage'] issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?', (title, issuedate)) new_entry = False if not iss_entry or iss_entry['IssueFile'] != issuefile: new_entry = True # new entry or name changed if not iss_entry: logger.debug("Adding issue %s %s" % (title, issuedate)) else: logger.debug("Updating issue %s %s" % (title, issuedate)) controlValueDict = { "Title": title, "IssueDate": issuedate } newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore') with open(ignorefile, 'a'): os.utime(ignorefile, None) createMagCover(issuefile, pagenum=magcoverpage, refresh=new_entry) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id, overwrite=new_entry) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info( "Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) else: logger.info("Magazine scan complete") lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def calibredb(cmd=None, prelib=None, postlib=None): """ calibre-server needs to be started with --enable-auth and needs user/password to add/remove books only basic features are available without auth. calibre_server should look like http://address:port/#library default library is used if no #library in the url or calibredb can talk to the database file as long as there is no running calibre """ if not lazylibrarian.CONFIG['IMP_CALIBREDB']: return "No calibredb set in config", '', 1 params = [lazylibrarian.CONFIG['IMP_CALIBREDB'], cmd] if lazylibrarian.CONFIG['CALIBRE_USE_SERVER']: dest_url = lazylibrarian.CONFIG['CALIBRE_SERVER'] if lazylibrarian.CONFIG['CALIBRE_USER'] and lazylibrarian.CONFIG[ 'CALIBRE_PASS']: params.extend([ '--username', lazylibrarian.CONFIG['CALIBRE_USER'], '--password', lazylibrarian.CONFIG['CALIBRE_PASS'] ]) else: dest_url = lazylibrarian.DIRECTORY('eBook') if prelib: params.extend(prelib) if cmd != "--version": params.extend(['--with-library', '%s' % dest_url]) if postlib: params.extend(postlib) logger.debug(str(params)) res = '' err = '' try: p = Popen(params, stdout=PIPE, stderr=PIPE) res, err = p.communicate() rc = p.returncode logger.debug("calibredb rc %s" % rc) # strip linefeeds etc from calibre response wsp = re.escape(string.whitespace) res = makeUnicode(res) err = makeUnicode(err) nres = re.sub(r'[' + wsp + ']', ' ', res) nerr = re.sub(r'[' + wsp + ']', ' ', err) logger.debug("calibredb res %d[%s]" % (len(nres), nres)) logger.debug("calibredb err %d[%s]" % (len(nerr), nerr)) if rc: if 'Errno 111' in err: logger.warn("calibredb returned Errno 111: Connection refused") elif 'Errno 13' in err: logger.warn("calibredb returned Errno 13: Permission denied") elif cmd == 'list_categories' and len(res): rc = 0 # false error return of 1 on v2.xx calibredb except Exception as e: logger.error("calibredb exception: %s %s" % (type(e).__name__, str(e))) rc = 1 # if rc and dest_url.startswith('http') and not res.startswith('Forbidden'): # # if not forbidden (auth issue), might be no server running, retry using file # params = [lazylibrarian.CONFIG['IMP_CALIBREDB'], cmd] # if prelib: # params.extend(prelib) # dest_url = lazylibrarian.DIRECTORY('eBook') # params.extend(['--with-library', dest_url]) # if postlib: # params.extend(postlib) # logger.debug(str(params)) # try: # q = Popen(params, stdout=PIPE, stderr=PIPE) # res, err = q.communicate() # res = makeUnicode(res) # err = makeUnicode(err) # rc = q.returncode # if rc: # logger.debug("calibredb retry returned %s: res[%s] err[%s]" % (rc, res, err)) # except Exception as e: # logger.error("calibredb retry exception: %s %s" % (type(e).__name__, str(e))) # rc = 1 if rc: return res, err, rc else: return res, dest_url, 0
def audioRename(bookid): for item in ['$Part', '$Title']: if item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioRename, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioRename %s" % bookid) return '' else: logger.debug("Invalid bookid in audioRename %s" % bookid) return '' if not TinyTag: logger.warn("TinyTag library not available") return '' cnt = 0 parts = [] author = '' book = '' total = 0 audio_file = '' for f in os.listdir(makeBytestr(r)): f = makeUnicode(f) if is_valid_booktype(f, booktype='audiobook'): cnt += 1 audio_file = f try: id3r = TinyTag.get(os.path.join(r, f)) performer = id3r.artist composer = id3r.composer book = id3r.album track = id3r.track total = id3r.track_total track = check_int(track, 0) total = check_int(total, 0) if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer if author and book: parts.append([track, book, author, f]) except Exception as e: logger.error("tinytag %s %s" % (type(e).__name__, str(e))) pass logger.debug("%s found %s audiofile%s" % (exists['BookName'], cnt, plural(cnt))) if cnt == 1 and not parts: # single file audiobook parts = [1, exists['BookName'], exists['AuthorName'], audio_file] if cnt != len(parts): logger.warn("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename if total and total != cnt: logger.warn("%s: Reported %i parts, got %i" % (exists['BookName'], total, cnt)) return book_filename # check all parts have the same author and title if len(parts) > 1: for part in parts: if part[1] != book: logger.warn("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.warn("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # do we have any track info (value is 0 if not) if parts[0][0] == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == ' 001.': pattern = ' %s.' % str(cnt).zfill(3) elif tokmatch == ' 01.': pattern = ' %s.' % str(cnt).zfill(2) elif tokmatch == ' 1.': pattern = ' %s.' % str(cnt) elif tokmatch == ' 001 ': pattern = ' %s ' % str(cnt).zfill(3) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) elif tokmatch == ' 1 ': pattern = ' %s ' % str(cnt) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = cnt break # check all parts are present cnt = 0 found = True while found and cnt < len(parts): found = False cnt += 1 for part in parts: trk = part[0] if trk == cnt: found = True break if not found: logger.warn("%s: No part %i found" % (exists['BookName'], cnt)) return book_filename # if we get here, looks like we have all the parts needed to rename properly seriesinfo = seriesInfo(bookid) dest_path = lazylibrarian.CONFIG['EBOOK_DEST_FOLDER'].replace( '$Author', author).replace( '$Title', book).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') dest_path = ' '.join(dest_path.split()).strip() dest_path = replace_all(dest_path, __dic__) dest_dir = lazylibrarian.DIRECTORY('Audio') dest_path = os.path.join(dest_dir, dest_path) if r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) for part in parts: pattern = lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE'] seriesinfo = seriesInfo(bookid) pattern = pattern.replace( '$Author', author).replace( '$Title', book).replace( '$Part', str(part[0]).zfill(len(str(len(parts))))).replace( '$Total', str(len(parts))).replace( '$Series', seriesinfo['Full']).replace( '$SerName', seriesinfo['Name']).replace( '$SerNum', seriesinfo['Num']).replace( '$$', ' ') pattern = ' '.join(pattern.split()).strip() n = os.path.join(r, pattern + os.path.splitext(part[3])[1]) o = os.path.join(r, part[3]) if o != n: try: n = safe_move(o, n) if part[0] == 1: book_filename = n # return part 1 of set logger.debug('%s: audioRename [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) return book_filename
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
def audioProcess(bookid, rename=False, playlist=False): """ :param bookid: book to process :param rename: rename to match audiobook filename pattern :param playlist: generate a playlist for popup :return: filename of part 01 of the audiobook """ for item in ['$Part', '$Title']: if rename and item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioProcess, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioProcess" % bookid) return '' else: logger.debug("Invalid bookid in audioProcess %s" % bookid) return '' if not TinyTag: logger.warn("TinyTag library not available") return '' cnt = 0 parts = [] total = 0 author = '' book = '' audio_file = '' abridged = '' for f in os.listdir(makeBytestr(r)): f = makeUnicode(f) if is_valid_booktype(f, booktype='audiobook'): cnt += 1 audio_file = f try: audio_path = os.path.join(r, f) performer = '' composer = '' albumartist = '' book = '' title = '' track = 0 total = 0 if TinyTag.is_supported(audio_path): id3r = TinyTag.get(audio_path) performer = id3r.artist composer = id3r.composer albumartist = id3r.albumartist book = id3r.album title = id3r.title track = id3r.track total = id3r.track_total track = check_int(track, 0) total = check_int(total, 0) if performer: performer = performer.strip() if composer: composer = composer.strip() if book: book = book.strip() if albumartist: albumartist = albumartist.strip() if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer elif albumartist: author = albumartist if author and book: parts.append([track, book, author, f]) if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'unabridged' in tag.lower(): abridged = 'Unabridged' break if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'abridged' in tag.lower(): abridged = 'Abridged' break except Exception as e: logger.error("tinytag %s %s" % (type(e).__name__, str(e))) pass finally: if not abridged: if audio_file and 'unabridged' in audio_file.lower(): abridged = 'Unabridged' break if not abridged: if audio_file and 'abridged' in audio_file.lower(): abridged = 'Abridged' break logger.debug("%s found %s audiofile%s" % (exists['BookName'], cnt, plural(cnt))) if cnt == 1 and not parts: # single file audiobook with no tags parts = [[1, exists['BookName'], exists['AuthorName'], audio_file]] if cnt != len(parts): logger.warn("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename if total and total != cnt: logger.warn("%s: Reported %i parts, got %i" % (exists['BookName'], total, cnt)) return book_filename # check all parts have the same author and title if len(parts) > 1: for part in parts: if part[1] != book: logger.warn("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.warn("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # do we have any track info (value is 0 if not) if parts[0][0] == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == ' 001.': pattern = ' %s.' % str(cnt).zfill(3) elif tokmatch == ' 01.': pattern = ' %s.' % str(cnt).zfill(2) elif tokmatch == ' 1.': pattern = ' %s.' % str(cnt) elif tokmatch == ' 001 ': pattern = ' %s ' % str(cnt).zfill(3) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) elif tokmatch == ' 1 ': pattern = ' %s ' % str(cnt) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = cnt break parts.sort(key=lambda x: x[0]) # check all parts are present cnt = 0 while cnt < len(parts): if parts[cnt][0] != cnt + 1: logger.warn("%s: No part %i found" % (exists['BookName'], cnt + 1)) return book_filename cnt += 1 if abridged: abridged = ' (%s)' % abridged # if we get here, looks like we have all the parts needed to rename properly seriesinfo = nameVars(bookid, abridged) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('Audio') dest_path = os.path.join(dest_dir, dest_path) if rename and r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) if playlist: try: playlist = open(os.path.join(r, 'playlist.ll'), 'w') except Exception as why: logger.error('Unable to create playlist in %s: %s' % (r, why)) playlist = None for part in parts: pattern = seriesinfo['AudioFile'] pattern = pattern.replace( '$Part', str(part[0]).zfill(len(str(len(parts))))).replace( '$Total', str(len(parts))) pattern = ' '.join(pattern.split()).strip() pattern = pattern + os.path.splitext(part[3])[1] if playlist: if rename: playlist.write(pattern + '\n') else: playlist.write(part[3] + '\n') if rename: n = os.path.join(r, pattern) o = os.path.join(r, part[3]) if o != n: try: n = safe_move(o, n) if part[0] == 1: book_filename = n # return part 1 of set logger.debug('%s: audioProcess [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) if playlist: playlist.close() return book_filename