def import_book(pp_path=None, bookID=None): # Separated this into a function so we can more easily import books from an alternate directory # and move them into LL folder structure given just the bookID, returns True or False # eg if import_book(source_directory, bookID): # ppcount = ppcount + 1 # myDB = database.DBConnection() data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # try: # auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING) # os.chmod(auth_dir, 0777) # except Exception, e: # logger.debug("Could not chmod author directory: " + str(auth_dir)) if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) global_name = common.remove_accents(global_name) # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: # update nzbs controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) processExtras(myDB, dest_path, global_name, data) logger.info('Successfully processed: %s' % global_name) notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) return False
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']), dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace match_ratio = int(lazylibrarian.MATCH_RATIO) tor_Title_match = fuzz.token_set_ratio(book['searchterm'], tor_Title) logger.debug("Torrent Title Match %: " + str(tor_Title_match) + " for " + tor_Title) if (tor_Title_match > match_ratio): logger.debug(u'Found Torrent: %s using %s search' % (tor['tor_title'], searchtype)) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) postprocess.schedule_processor(action='Start') return True logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long # Set a fairly arbitrary 500 message limit for now # TODO make this configurable? # Ensure messages are ascii as some author names contain accents and the web page doesnt like them lazylibrarian.LOGLIST.insert(0, (formatter.now(), formatter.latinToAscii(message), level, threadname)) if len(lazylibrarian.LOGLIST) > 500: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Ensure messages are ascii as some author names contain accents and the web page doesnt like them message = formatter.latinToAscii(message) if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def processOPF( dest_path=None, authorname=None, bookname=None, bookisbn=None, bookid=None, bookpub=None, bookdate=None, bookdesc=None, booklang=None, global_name=None, ): opfinfo = ( '<?xml version="1.0" encoding="UTF-8"?>\n\ <package version="2.0" xmlns="http://www.idpf.org/2007/opf" >\n\ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n\ <dc:title>%s</dc:title>\n\ <creator>%s</creator>\n\ <dc:language>%s</dc:language>\n\ <dc:identifier scheme="GoogleBooks">%s</dc:identifier>\n' % (bookname, authorname, booklang, bookid) ) if bookisbn: opfinfo += ' <dc:identifier scheme="ISBN">%s</dc:identifier>\n' % bookisbn if bookpub: opfinfo += " <dc:publisher>%s</dc:publisher>\n" % bookpub if bookdate: opfinfo += " <dc:date>%s</dc:date>\n" % bookdate if bookdesc: opfinfo += " <dc:description>%s</dc:description>\n" % bookdesc opfinfo += ' <guide>\n\ <reference href="cover.jpg" type="cover" title="Cover"/>\n\ </guide>\n\ </metadata>\n\ </package>' dic = {"...": "", " & ": " ", " = ": " ", "$": "s", " + ": " ", ",": "", "*": ""} opfinfo = formatter.latinToAscii(formatter.replace_all(opfinfo, dic)) # handle metadata opfpath = os.path.join(dest_path, global_name + ".opf") if not os.path.exists(opfpath): opf = open(opfpath, "wb") opf.write(opfinfo) opf.close() try: os.chmod(opfpath, 0777) except Exception, e: logger.info("Could not chmod path: " + str(opfpath)) logger.debug("Saved metadata to: " + opfpath)
def processOPF(dest_path=None, authorname=None, bookname=None, bookisbn=None, bookid=None, bookpub=None, bookdate=None, bookdesc=None, booklang=None): opfinfo = '<?xml version="1.0" encoding="UTF-8"?>\n\ <package version="2.0" xmlns="http://www.idpf.org/2007/opf" >\n\ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n\ <dc:title>%s</dc:title>\n\ <creator>%s</creator>\n\ <dc:language>%s</dc:language>\n\ <dc:identifier scheme="GoogleBooks">%s</dc:identifier>\n' % ( bookname, authorname, booklang, bookid) if bookisbn: opfinfo += ' <dc:identifier scheme="ISBN">%s</dc:identifier>\n' % bookisbn if bookpub: opfinfo += ' <dc:publisher>%s</dc:publisher>\n' % bookpub if bookdate: opfinfo += ' <dc:date>%s</dc:date>\n' % bookdate if bookdesc: opfinfo += ' <dc:description>%s</dc:description>\n' % bookdesc opfinfo += ' <guide>\n\ <reference href="cover.jpg" type="cover" title="Cover"/>\n\ </guide>\n\ </metadata>\n\ </package>' dic = { '...': '', ' & ': ' ', ' = ': ' ', '$': 's', ' + ': ' ', ',': '', '*': '' } opfinfo = formatter.latinToAscii(formatter.replace_all(opfinfo, dic)) #handle metadata opfpath = os.path.join(dest_path, 'metadata.opf') if not os.path.exists(opfpath): opf = open(opfpath, 'wb') opf.write(opfinfo) opf.close() logger.info('Saved metadata to: ' + opfpath) else: logger.info('%s allready exists. Did not create one.' % opfpath)
def MakeSearchTermWebSafe(insearchterm=None): dic = {"...": "", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": ""} searchterm = formatter.latinToAscii(formatter.replace_all(insearchterm, dic)) searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8") logger.debug("Converting Search Term [%s] to Web Safe Search Term [%s]" % (insearchterm, searchterm)) return searchterm
def MakeSearchTermWebSafe(insearchterm=None): dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''} searchterm = formatter.latinToAscii(formatter.replace_all(insearchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') logger.debug("Converting Search Term [%s] to Web Safe Search Term [%s]" % (insearchterm, searchterm)) return searchterm
def processOPF(dest_path=None, authorname=None, bookname=None, bookisbn=None, bookid=None, bookpub=None, bookdate=None, bookdesc=None, booklang=None, global_name=None): opfinfo = '<?xml version="1.0" encoding="UTF-8"?>\n\ <package version="2.0" xmlns="http://www.idpf.org/2007/opf" >\n\ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n\ <dc:title>%s</dc:title>\n\ <creator>%s</creator>\n\ <dc:language>%s</dc:language>\n\ <dc:identifier scheme="GoogleBooks">%s</dc:identifier>\n' % (bookname, authorname, booklang, bookid) if bookisbn: opfinfo += ' <dc:identifier scheme="ISBN">%s</dc:identifier>\n' % bookisbn if bookpub: opfinfo += ' <dc:publisher>%s</dc:publisher>\n' % bookpub if bookdate: opfinfo += ' <dc:date>%s</dc:date>\n' % bookdate if bookdesc: opfinfo += ' <dc:description>%s</dc:description>\n' % bookdesc opfinfo += ' <guide>\n\ <reference href="cover.jpg" type="cover" title="Cover"/>\n\ </guide>\n\ </metadata>\n\ </package>' dic = {'...': '', ' & ': ' ', ' = ': ' ', '$': 's', ' + ': ' ', ',': '', '*': ''} opfinfo = formatter.latinToAscii(formatter.replace_all(opfinfo, dic)) # handle metadata opfpath = os.path.join(dest_path, global_name + '.opf') if not os.path.exists(opfpath): with open(opfpath, 'wb') as opf: opf.write(opfinfo) # try: # os.chmod(opfpath, 0777) # except Exception, e: # logger.error("Could not chmod path: " + str(opfpath)) logger.debug('Saved metadata to: ' + opfpath) else: logger.debug('%s allready exists. Did not create one.' % opfpath)
def openBook(self, bookLink=None, action=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select('SELECT * from books WHERE BookLink=\'' + bookLink + '\'') logger.debug( ('SELECT * from books WHERE BookLink=\'' + bookLink + '\'')) if bookdata: authorName = bookdata[0]["AuthorName"] bookName = bookdata[0]["BookName"] dic = { '<': '', '>': '', '=': '', '?': '', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } bookName = formatter.latinToAscii( formatter.replace_all(bookName, dic)) if (lazylibrarian.INSTALL_TYPE == 'win'): dest_dir = lazylibrarian.DESTINATION_DIR + '\\' + authorName + '\\' + bookName else: dest_dir = lazylibrarian.DESTINATION_DIR + '//' + authorName + '//' + bookName logger.debug('bookdir ' + dest_dir) if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): logger.info('Openning file ' + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def openMag(self, bookid=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select("SELECT * from magazines WHERE Title=?", [bookid]) if bookdata: Title = bookdata[0]["Title"] IssueDate = bookdata[0]["IssueDate"] dic = {"<": "", ">": "", "=": "", "?": "", '"': "", ",": "", "*": "", ":": "", ";": "", "'": ""} bookName = formatter.latinToAscii(formatter.replace_all(Title, dic)) pp_dir = lazylibrarian.DESTINATION_DIR mag_path = lazylibrarian.MAG_DEST_FOLDER.replace("$IssueDate", IssueDate).replace("$Title", Title) dest_dir = os.path.join(pp_dir, mag_path) logger.debug("bookdir " + dest_dir) if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if (file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0): logger.info("Opening file " + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def openMag(self, bookid=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select('SELECT * from magazines WHERE Title=?', [bookid]) if bookdata: Title = bookdata[0]["Title"]; IssueDate = bookdata[0]["IssueDate"]; dic = {'<':'', '>':'', '=':'', '?':'', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} bookName = formatter.latinToAscii(formatter.replace_all(Title, dic)) pp_dir = lazylibrarian.DESTINATION_DIR mag_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', IssueDate).replace('$Title', Title) dest_dir = os.path.join(pp_dir, mag_path) logger.debug('bookdir ' + dest_dir); if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): logger.info('Opening file ' + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def openBook(self, bookid=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select('SELECT * from books WHERE BookID=?', [bookid]) if bookdata: authorName = bookdata[0]["AuthorName"]; bookName = bookdata[0]["BookName"]; dic = {'<':'', '>':'', '=':'', '?':'', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} bookName = formatter.latinToAscii(formatter.replace_all(bookName, dic)) pp_dir = lazylibrarian.DESTINATION_DIR ebook_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorName).replace('$Title', bookName) dest_dir = os.path.join(pp_dir, ebook_path) logger.debug('bookdir ' + dest_dir); if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): logger.info('Opening file ' + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def openBook(self, bookid=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select("SELECT * from books WHERE BookID=?", [bookid]) if bookdata: authorName = bookdata[0]["AuthorName"] bookName = bookdata[0]["BookName"] dic = {"<": "", ">": "", "=": "", "?": "", '"': "", ",": "", "*": "", ":": "", ";": "", "'": ""} bookName = formatter.latinToAscii(formatter.replace_all(bookName, dic)) pp_dir = lazylibrarian.DESTINATION_DIR ebook_path = lazylibrarian.EBOOK_DEST_FOLDER.replace("$Author", authorName).replace("$Title", bookName) dest_dir = os.path.join(pp_dir, ebook_path) logger.debug("bookdir " + dest_dir) if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if (file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0): logger.info("Opening file " + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def import_book(pp_path=None, bookID=None): # Separated this into a function so we can more easily import books from an alternate directory # and move them into LL folder structure given just the bookID, returns True or False # eg if import_book(source_directory, bookID): # ppcount = ppcount + 1 # myDB = database.DBConnection() data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] try: auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING) os.chmod(auth_dir, 0777) except Exception, e: logger.debug("Could not chmod author directory: " + str(auth_dir)) dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, bookID) if processBook: # update nzbs controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) processExtras(myDB, dest_path, global_name, data) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s' % pp_path) return False
def openBook(self, bookLink=None, action=None, **args): myDB = database.DBConnection() # find book bookdata = myDB.select('SELECT * from books WHERE BookLink=\'' + bookLink + '\'') logger.debug(('SELECT * from books WHERE BookLink=\'' + bookLink + '\'')) if bookdata: authorName = bookdata[0]["AuthorName"]; bookName = bookdata[0]["BookName"]; dic = {'<':'', '>':'', '=':'', '?':'', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} bookName = formatter.latinToAscii(formatter.replace_all(bookName, dic)) if (lazylibrarian.INSTALL_TYPE == 'win'): dest_dir = lazylibrarian.DESTINATION_DIR + '\\' + authorName + '\\' + bookName else: dest_dir = lazylibrarian.DESTINATION_DIR + '//' + authorName + '//' + bookName logger.debug('bookdir ' + dest_dir); if os.path.isdir(dest_dir): for file2 in os.listdir(dest_dir): if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): logger.info('Openning file ' + str(file2)) return serve_file(os.path.join(dest_dir, file2), "application/x-download", "attachment")
def search_tor_book(books=None, mags=None): if not (lazylibrarian.USE_TOR): return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"' ) # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip() }) if not lazylibrarian.KAT: logger.info('No download method is set, use SABnzbd or blackhole') counter = 0 for book in searchlist: #print book.keys() resultlist = providers.IterateOverTorrentSites(book, 'book') #if you can't find teh book specifically, you might find under general search if not resultlist: logger.info( "Searching for type book failed to find any books...moving to general search" ) resultlist = providers.IterateOverTorrentSites(book, 'general') if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' ' } logger.debug(u'searchterm %s' % book['searchterm']) addedCounter = 0 for tor in resultlist: tor_Title = formatter.latinToAscii( formatter.replace_all( str(tor['tor_title']).lower(), dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) #remove extra whitespace logger.debug(u'torName %s' % tor_Title) match_ratio = int(lazylibrarian.MATCH_RATIO) tor_Title_match = fuzz.token_sort_ratio( book['searchterm'].lower(), tor_Title) logger.debug("Torrent Title Match %: " + str(tor_Title_match)) if (tor_Title_match > match_ratio): logger.info(u'Found Torrent: %s' % tor['tor_title']) addedCounter = addedCounter + 1 bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor[ 'tor_size'] #Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, tor_prov, tor_Title, tor_url) notifiers.notify_snatch(tor_Title + ' at ' + formatter.now()) break if addedCounter == 0: logger.info("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.") counter = counter + 1 # if not books or books==False: # snatched = searchmag.searchmagazines(mags) # for items in snatched: # snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url']) # notifiers.notify_snatch(items['tor_title']+' at '+formatter.now()) logger.info("Search for Wanted items complete")
def search_tor_book(books=None, mags=None): if not(lazylibrarian.USE_TOR): return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''} dicSearchFormatting = {'.':' +', ' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()}) if not lazylibrarian.KAT: logger.info('No download method is set, use SABnzbd or blackhole') counter = 0 for book in searchlist: #print book.keys() resultlist = providers.IterateOverTorrentSites(book,'book') #if you can't find teh book specifically, you might find under general search if not resultlist: logger.info("Searching for type book failed to find any books...moving to general search") resultlist = providers.IterateOverTorrentSites(book,'general') if not resultlist: logger.debug("No result found, Adding book %s to queue " % book['searchterm']) else: dictrepl = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' } logger.debug(u'searchterm %s' % book['searchterm']) addedCounter = 0 for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']).lower(), dictrepl)).strip() tor_Title = re.sub(r"\s\s+" , " ", tor_Title) #remove extra whitespace logger.debug(u'torName %s' % tor_Title) match_ratio = int(lazylibrarian.MATCH_RATIO) tor_Title_match = fuzz.token_sort_ratio(book['searchterm'].lower(), tor_Title) logger.debug("Torrent Title Match %: " + str(tor_Title_match)) if (tor_Title_match > match_ratio): logger.info(u'Found Torrent: %s' % tor['tor_title']) addedCounter = addedCounter + 1 bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] #Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576,2))+' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, tor_prov, tor_Title, tor_url) notifiers.notify_snatch(tor_Title+' at '+formatter.now()) break; if addedCounter == 0: logger.info("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.") counter = counter + 1 # if not books or books==False: # snatched = searchmag.searchmagazines(mags) # for items in snatched: # snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url']) # notifiers.notify_snatch(items['tor_title']+' at '+formatter.now()) logger.info("Search for Wanted items complete")
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':''} dicSearchFormatting = {' ':' +', '.':' +', ' + ':' '} dicSearchFormatting1 = {' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) #OLD SEARCH TERM searchterm = author + ' ' + book + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()}) # TRY A SECCOND SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting1)) searchterm1 = '+' + author + ' +' + lazylibrarian.EBOOK_TYPE searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8') searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8') searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NZBMATRIX: logger.info('No providers are set. use NEWZNAB or NZBMATRIX') counter = 0 for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB and not resultlist: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book) if lazylibrarian.NZBMATRIX and not resultlist: logger.debug('Searching NZB at provider NZBMatrix ...') resultlist = providers.NZBMatrix(book) if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '\s\s':' ' } bookName = book['bookName'] bookName = re.sub('[\.\-\/]', ' ', bookName) bookName = re.sub(r'\(.*?\)', '', bookName) bookName = formatter.latinToAscii(formatter.replace_all(bookName, dictrepl)).strip() logger.debug(u'bookName %s' % bookName) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() logger.debug(u'nzbName %s' % nzbTitle) nameFound = 0 bookNameList = bookName.split() for word in bookNameList: if nzbTitle.lower().find(word.lower()) == -1: nameFound = -1 if nameFound == 0: logger.debug(u'FOUND %s' % nzbTitle.lower()) addedCounter = addedCounter + 1 bookid = nzb['bookid'] nzbTitle = (book["authorName"] + ' ' + bookName).strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbTitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) time.sleep(1) if addedCounter == 0: logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.") counter = counter + 1
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure #if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY #else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": "Wanted", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %s results for %s. %s are new, %s are old, %s fail date, %s fail name matching' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex)) logger.info("%s, %s issues to download" % (bookid, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset == True: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def search_rss_book(books=None, reset=False): if not(lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') common.schedule_job(action='Stop', target='search_rss_book') return # rename this thread threading.currentThread().name = "SEARCHRSSBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("RSS search requested for no books") return elif len(searchbooks) == 1: logger.info('RSS Searching for one book') else: logger.info('RSS Searching for %i books' % len(searchbooks)) resultlist, nproviders = providers.IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} rss_count = 0 for book in searchbooks: bookid = book['BookID'] author = book['AuthorName'] title = book['BookName'] author = formatter.latinToAscii(formatter.replace_all(author, dic)) title = formatter.latinToAscii(formatter.replace_all(title, dic)) found = processResultList(resultlist, author, title, book) # if you can't find the book, try author without initials, # and title without any "(extended details, series etc)" if not found: if author[1] in '. ' or '(' in title: # anything to shorten? while author[1] in '. ': # strip any initials author = author[2:].strip() # and leading whitespace if '(' in title: title = title.split('(')[0] found = processResultList(resultlist, author, title, book) if not found: logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title)) else: rss_count = rss_count + 1 if rss_count == 1: logger.info("RSS Search for Wanted items complete, found %s book" % rss_count) else: logger.info("RSS Search for Wanted items complete, found %s books" % rss_count) if reset == True: common.schedule_job(action='Restart', target='search_rss_book')
searchbooks.append(terms) if len(searchbooks) == 1: logger.info('TOR Searching for one book') else: logger.info('TOR Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}) counter = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'book') if not nproviders: logger.warn('No torrent providers are set, check config for TORRENT providers')
def processDir(): logger.debug('Postprocessing has begun.') # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR downloads = os.listdir(processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for directory in downloads: if "LL.(" in directory: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if (os.path.exists(pp_path)): logger.debug('Found folder %s.' % pp_path) data = myDB.select( "SELECT * from books WHERE BookID='%s'" % bookID) for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] try: os.chmod( os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode( lazylibrarian.SYS_ENCODING), 0777) except Exception, e: logger.debug("Could not chmod author directory") dest_path = authorname + os.sep + bookname dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join( lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination( pp_path, dest_path, authorname, bookname) if processBook: ppcount = ppcount + 1 # try image processIMG(dest_path, bookimg) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, bookID, bookpub, bookdate, bookdesc, booklang) #update nzbs controlValueDict = {"NZBurl": directory} newValueDict = {"Status": "Success"} myDB.upsert("wanted", newValueDict, controlValueDict) #update books controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) logger.info('Successfully processed: %s - %s' % (authorname, bookname)) else: logger.info('Postprocessing for %s has failed.' % bookname) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''} dicSearchFormatting = {'.':' +', ' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm1 = author # + ' ' + lazylibrarian.EBOOK_TYPE searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8') searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8') searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2: logger.info('No providers are set. use NEWZNAB.') counter = 0 for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book, "1") if lazylibrarian.NEWZNAB2: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST2) resultlist += providers.NewzNab(book, "2") if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' } bookName = book['bookName'] bookID = book['bookid'] bookName = re.sub('[\.\-\/]', ' ', bookName) bookName = re.sub(r'\(.*?\)', '', bookName) bookName = formatter.latinToAscii(formatter.replace_all(bookName.lower(), dictrepl)).strip() logger.debug(u'bookName %s' % bookName) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb['nzbtitle']).lower(), dictrepl)).strip() logger.debug(u'nzbName %s' % nzbTitle) logger.debug("NZB Match %: " + str(fuzz.partial_ratio(bookName, nzbTitle))) if (fuzz.partial_ratio(bookName, nzbTitle) > 80): logger.debug(u'FOUND %s' % nzbTitle.lower()) addedCounter = addedCounter + 1 bookid = nzb['bookid'] nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + bookID + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbTitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) break; if addedCounter == 0: logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.") counter = counter + 1
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" logger.debug('Postprocessing has begun.') processpath = lazylibrarian.DOWNLOAD_DIR downloads = os.listdir(processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount=0 for directory in downloads: if "LL.(" in directory: bookID = str(directory).split("LL.(")[1].split(")")[0]; logger.debug("Book with id: " + str(bookID) + " is in downloads"); pp_path = os.path.join(processpath, directory) if (os.path.exists(pp_path)): logger.debug('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % bookID) for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] try: os.chmod(os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING), 0777); except Exception, e: logger.debug("Could not chmod author directory"); dest_path = authorname + os.sep + bookname dic = {'<':'', '>':'', '...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname) if processBook: ppcount = ppcount+1 # try image processIMG(dest_path, bookimg) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, bookID, bookpub, bookdate, bookdesc, booklang) #update nzbs controlValueDict = {"NZBurl": directory} newValueDict = {"Status": "Success"} myDB.upsert("wanted", newValueDict, controlValueDict) #update books controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) logger.info('Successfully processed: %s - %s' % (authorname, bookname)) else: logger.info('Postprocessing for %s has failed.' % bookname) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug( u"Magazine token set Match failed: " + str( mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue # store all the _new_ matching results, marking as "skipped" for now # we change the status to "wanted" on the ones we want to snatch later # don't add a new entry if this issue has been found on an earlier search # because status might have been user-set mag_entry = myDB.select('SELECT * from wanted WHERE NZBtitle="%s" and NZBprov="%s"' % (nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": "Skipped", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBdate": formatter.now(), # when we asked for it "Status": "Wanted" } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %i results for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, torTitle) tor_Title_match = fuzz.token_set_ratio(title, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): #logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match RSS (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def search_rss_book(books=None, reset=False): if not(lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') common.schedule_job(action='Stop', target='search_rss_book') return # rename this thread threading.currentThread().name = "SEARCHRSSBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("RSS search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('RSS Searching for one book') else: logger.info('RSS Searching for %i books' % len(searchbooks)) resultlist, nproviders = providers.IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} rss_count = 0 for book in searchbooks: bookid = book['BookID'] author = book['AuthorName'] title = book['BookName'] author = formatter.latinToAscii(formatter.replace_all(author, dic)) title = formatter.latinToAscii(formatter.replace_all(title, dic)) found = processResultList(resultlist, author, title, book) # if you can't find the book, try author without initials, # and title without any "(extended details, series etc)" if not found: if author[1] in '. ' or '(' in title: # anything to shorten? while author[1] in '. ': # strip any initials author = author[2:].strip() # and leading whitespace if '(' in title: title = title.split('(')[0] found = processResultList(resultlist, author, title, book) if not found: logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title)) else: rss_count = rss_count + 1 plural = "s" if rss_count == 1: plural = "" logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural)) if reset: common.schedule_job(action='Restart', target='search_rss_book')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace #nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) #logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if searchtype == 'book' or searchtype == 'shortbook': nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) logger.debug(u"NZB token set Match %: " + str(nzbTitle_match) + " for " + nzbTitle) elif searchtype == 'author': nzbTitle_match = fuzz.token_set_ratio(book['authorName'].encode('utf-8'), nzbTitle) logger.debug(u"NZB author Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if nzbTitle_match > match_ratio: nzbTitle_match = fuzz.token_set_ratio(book['bookName'].encode('utf-8'), nzbTitle) logger.debug(u"NZB book Match %: " + str(nzbTitle_match) + " for " + nzbTitle) else: # searchtype == 'general': nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) logger.debug(u"NZB Title general Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if (nzbTitle_match > match_ratio): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(formatter.latinToAscii(nzbTitle) + ' at ' + formatter.now()) postprocess.schedule_processor(action='Start') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def searchbook(bookid=None): myDB = database.DBConnection() if bookid: searchbooks = myDB.select( 'SELECT AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [bookid]) else: searchbooks = myDB.select( 'SELECT AuthorName, Bookname from books WHERE Status="Wanted"') for searchbook in searchbooks: author = searchbook[0] book = searchbook[1] logger.info('Searching for %s - %s.' % (author, book)) dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') resultlist = [] if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No downloadmethod is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB: logger.info('No providers are set.') if lazylibrarian.NEWZNAB: logger.info('Searching NZB at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(searchterm, resultlist) # FUTURE-CODE # if lazylibrarian.NEWZBIN: # logger.info('Searching NZB at provider %s ...' % lazylibrarian.NEWZBIN) # resultlist = providers.Newzbin(searchterm, resultlist) # if lazylibrarian.NZBMATRIX: # logger.info('Searching NZB at provider %s ...' % lazylibrarian.NZBMATRIX) # resultlist = providers.NZBMatrix(searchterm, resultlist) # if lazylibrarian.NZBSORG: # logger.info('Searching NZB at provider %s ...' % lazylibrarian.NZBSORG) # resultlist = providers.NZBsorg(searchterm, resultlist) if resultlist is None: logger.info( "Search didn't have results. Adding book %s - %s to queue." % (author, book)) else: for nzb in resultlist: nzbtitle = nzb['nzbtitle'] nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] #save nzb's to database for later use snatchedbooks = myDB.action( 'SELECT * from wanted WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if snatchedbooks: "Book with BookID %s allready snatched, skipped this NZB." controlValueDict = {"BookID": bookid} newValueDict = { "NZBprov": nzbprov, "NZBdate": formatter.today(), "NZBurl": nzburl, "NZBtitle": nzbtitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) else: snatch = DownloadMethod(bookid, nzbprov, nzbtitle, nzburl)
def search_nzb_book(books=None, reset=False): if not lazylibrarian.USE_NZB(): logger.warn('No NEWZNAB/TORZNAB providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("NZB search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('NZB Searching for one book') else: logger.info('NZB Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) if '(' in book: # may have title (series/extended info) book = book.split('(')[0] # TRY SEARCH TERM just using author name and book author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST: logger.warn('No download method is set, use SABnzbd/NZBGet or blackhole, check config') nzb_count = 0 for book in searchlist: # first attempt, try author/title in category "book" resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'book') if not nproviders: logger.warn('No NewzNab or TorzNab providers are set, check config') return # no point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug("NZB Searches returned no results. Adding book %s to queue." % book['searchterm']) else: nzb_count = nzb_count + 1 if nzb_count == 1: logger.info("NZBSearch for Wanted items complete, found %s book" % nzb_count) else: logger.info("NZBSearch for Wanted items complete, found %s books" % nzb_count) if reset: common.schedule_job(action='Restart', target='search_nzb_book')
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"' ) # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm1 = author # + ' ' + lazylibrarian.EBOOK_TYPE searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8') searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8') searchterm1 = re.sub(r"\s\s+", " ", searchterm1) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm1.strip() }) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2: logger.info('No providers are set. use NEWZNAB.') counter = 0 for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book, "1") if lazylibrarian.NEWZNAB2: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST2) resultlist += providers.NewzNab(book, "2") if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' ' } bookName = book['bookName'] bookID = book['bookid'] bookName = re.sub('[\.\-\/]', ' ', bookName) bookName = re.sub(r'\(.*?\)', '', bookName) bookName = formatter.latinToAscii( formatter.replace_all(bookName.lower(), dictrepl)).strip() logger.debug(u'bookName %s' % bookName) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii( formatter.replace_all( str(nzb['nzbtitle']).lower(), dictrepl)).strip() logger.debug(u'nzbName %s' % nzbTitle) logger.debug("NZB Match %: " + str(fuzz.partial_ratio(bookName, nzbTitle))) if (fuzz.partial_ratio(bookName, nzbTitle) > 80): logger.debug(u'FOUND %s' % nzbTitle.lower()) addedCounter = addedCounter + 1 bookid = nzb['bookid'] nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + bookID + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbTitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) break if addedCounter == 0: logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.") counter = counter + 1
def search_tor_book(books=None, reset=False): if not lazylibrarian.USE_TOR(): logger.warn('No Torrent providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("TOR search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('TOR Searching for one book') else: logger.info('TOR Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook['BookID'] author = searchbook['AuthorName'] book = searchbook['BookName'] dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}) tor_count = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'book') if not nproviders: logger.warn('No torrent providers are set, check config') return # No point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug("Searches returned no results. Adding book %s to queue." % book['searchterm']) else: tor_count = tor_count + 1 if tor_count == 1: logger.info("TORSearch for Wanted items complete, found %s book" % tor_count) else: logger.info("TORSearch for Wanted items complete, found %s books" % tor_count) if reset: common.schedule_job(action='Restart', target='search_tor_book')
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) # TODO - try exception on os.listdir - it throws debug level # exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.debug('Found book/mag folder %s.' % pp_path) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep this for processing issues arriving out of order dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) try: os.chmod(dest_path, 0777) except Exception, e: logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['BookID']) if processBook: ppcount = ppcount + 1 # update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path) controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file} myDB.upsert("issues", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % global_name) notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s' % pp_path) # # TODO Seems to be duplication here. Can we just scan once for snatched books # instead of scan for snatched and then scan for directories with "LL.(bookID)" in? # Should there be any directories with "LL.(bookID)" that aren't in snatched? # Maybe this was put in for manually downloaded books? # downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": " ", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": " ", "\s\s": " ", } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", "'": "", } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic)) title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic)) matches = [] for nzb in resultlist: nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip() nzb_Title = re.sub(r"\s\s+", " ", nzb_Title) # remove extra whitespace nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title) nzbBook_match = fuzz.token_set_ratio(title, nzb_Title) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title)) rejected = False for word in reject_list: if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzb_Title, word)) break nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and nzbsize > maxsize: rejected = True logger.debug("Rejecting %s, too large" % nzb_Title) if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected: # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book["bookid"] nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb["nzbmode"] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped", } score = (nzbBook_match + nzbAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(nzb_Title)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, nzb_Title, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"] ).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) else: snatch = NZBDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now()) common.schedule_job(action="Start", target="processDir") return True logger.debug( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + " using searchtype " + searchtype ) return False
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try : downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount=0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) if data: for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) #global_name = bookname + ' - ' + authorname else: data = myDB.select("SELECT * from magazines WHERE Title='%s'" % book['BookID']) for metadata in data: title = metadata['Title'] #AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple #files are downloading, there will be an error in post-processing, trying to go to the #same directory. dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', title) #dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', title) #global_name = book['AuxInfo']+' - '+title else: logger.info("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue try: os.chmod(os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING), 0777); except Exception, e: logger.debug("Could not chmod post-process directory"); dic = {'<':'', '>':'', '...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: ppcount = ppcount+1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed"} myDB.upsert("wanted", newValueDict, controlValueDict) # try image if bookname is not None: processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang, global_name) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname countauthor = myDB.action(author_query).fetchone() if countauthor: myDB.upsert("authors", newValueDict, controlValueDict) else: #update mags controlValueDict = {"Title": book['BookID']} newValueDict = {"IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % (global_name)) notifiers.notify_download(global_name+' at '+formatter.now()) else: logger.error('Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % global_name) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, tor_Title) tor_Title_match = fuzz.token_set_ratio(title, tor_Title) logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title)) rejected = False for word in reject_list: if word in tor_Title.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (tor_Title, word)) break if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first if '.nzb' in tor_url: snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url) else: # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def search_nzb_book(books=None, mags=None): if not (lazylibrarian.USE_NZB): return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book["bookid"]], ) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", } dicSearchFormatting = {".": " +", " + ": " "} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + " " + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8") searchterm = re.sub(r"\(.*?\)", "", searchterm).encode("utf-8") searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append( {"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()} ) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: logger.info("No download method is set, use SABnzbd or blackhole") # TODO - Move the newznab test to providers.py if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2 and not lazylibrarian.USENETCRAWLER: logger.info("No providers are set. try use NEWZNAB.") counter = 0 for book in searchlist: # print book.keys() resultlist = providers.IterateOverNewzNabSites(book, "book") # if you can't find teh book specifically, you might find under general search if not resultlist: logger.info("Searching for type book failed to find any books...moving to general search") resultlist = providers.IterateOverNewzNabSites(book, "general") if not resultlist: logger.debug("Adding book %s to queue." % book["searchterm"]) else: dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": "", "\s\s": " ", " the ": " ", " a ": " ", " and ": " ", " to ": " ", " of ": " ", " for ": " ", " my ": " ", " in ": " ", " at ": " ", " with ": " ", } logger.debug(u"searchterm %s" % book["searchterm"]) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb["nzbtitle"]).lower(), dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace logger.debug(u"nzbName %s" % nzbTitle) match_ratio = int(lazylibrarian.MATCH_RATIO) nzbTitle_match = fuzz.token_sort_ratio(book["searchterm"].lower(), nzbTitle) logger.debug("NZB Title Match %: " + str(nzbTitle_match)) if nzbTitle_match > match_ratio: logger.info(u"Found NZB: %s" % nzb["nzbtitle"]) addedCounter = addedCounter + 1 bookid = book["bookid"] nzbTitle = (book["authorName"] + " - " + book["bookName"] + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + " MB" nzbdate = formatter.nzbdate2format(nzbdate_temp) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBsize": nzbsize, "NZBtitle": nzbTitle, "Status": "Skipped", } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid] ).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) notifiers.notify_snatch(nzbTitle + " at " + formatter.now()) break if addedCounter == 0: logger.info( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + ". Adding book to queue." ) counter = counter + 1 if not books or books == False: snatched = searchmag.searchmagazines(mags) for items in snatched: snatch = DownloadMethod(items["bookid"], items["nzbprov"], items["nzbtitle"], items["nzburl"]) notifiers.notify_snatch(items["nzbtitle"] + " at " + formatter.now()) logger.info("Search for Wanted items complete")
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) if data: for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) #global_name = bookname + ' - ' + authorname else: data = myDB.select( "SELECT * from magazines WHERE Title='%s'" % book['BookID']) for metadata in data: title = metadata['Title'] #AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple #files are downloading, there will be an error in post-processing, trying to go to the #same directory. dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', title) #dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', title) #global_name = book['AuxInfo']+' - '+title else: logger.info("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue try: os.chmod( os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING), 0777) except Exception, e: logger.debug("Could not chmod post-process directory") dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: ppcount = ppcount + 1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed"} myDB.upsert("wanted", newValueDict, controlValueDict) # try image if bookname is not None: processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang, global_name) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname countauthor = myDB.action(author_query).fetchone() if countauthor: myDB.upsert("authors", newValueDict, controlValueDict) else: #update mags controlValueDict = {"Title": book['BookID']} newValueDict = {"IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % (global_name)) notifiers.notify_download(global_name + ' at ' + formatter.now()) else: logger.error( 'Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % global_name) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] dest_path = authorname + '/' + bookname dic = { '<': '', '>': '', '=': '', '?': '', '"': '', ',': '', '*': '', ':': '', ';': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname) if processBook: ppcount = ppcount + 1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. processAutoAdd(dest_path) # try image processIMG(dest_path, bookimg) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Success"} myDB.upsert("wanted", newValueDict, controlValueDict) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Have"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND Status="Have"' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) logger.info('Successfully processed: %s - %s' % (authorname, bookname)) else: logger.error( 'Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % bookname) if ppcount: logger.info('%s books are downloaded and processed.' % ppcount) logger.debug(' - Completed all snatched/downloaded files')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']), dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break if (torAuthor_match >= match_ratio and torBook_match >= match_ratio and not rejected): #logger.debug(u'Found Torrent: %s using %s search' % (tor['tor_title'], searchtype)) bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match TOR (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] #If user did not pass a book, then return all wanted books if books is None: searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') #Otherwise return all books with matching ID else: searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] #Strip illegal chars dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''} #Convert Author and Book to ASCII author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) #Build Searchlist searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No downloadmethod is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB: logger.info('No providers are set.') #Conatct all usenet providers and search for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB and not resultlist: logger.info('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book) if lazylibrarian.NZBMATRIX and not resultlist: logger.info('Searching NZB at provider NZBMatrix ...') resultlist = providers.NZBMatrix(book) if not resultlist: logger.info("Search didn't have results. Adding book %s to queue." % book['searchterm']) else: for nzb in resultlist: bookid = nzb['bookid'] nzbtitle = nzb['nzbtitle'] nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbtitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbtitle, nzburl) time.sleep(1)
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR downloads = os.listdir(processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount=0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] dest_path = authorname+'/'+bookname dic = {'<':'', '>':'', '=':'', '?':'', '"':'', ',':'', '*':'', ':':'', ';':''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname) if processBook: ppcount = ppcount+1 # try image processIMG(dest_path, bookimg) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Success"} myDB.upsert("wanted", newValueDict, controlValueDict) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Have"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND Status="Have"' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) logger.info('Successfully processed: %s - %s' % (authorname, bookname)) else: logger.info('Postprocessing for %s has failed.' % bookname) if ppcount: logger.info('%s books are downloaded and processed.' % ppcount)
def searchmagazines(mags=None): maglist = [] myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: searchmags = myDB.select( 'SELECT Title, Frequency, LastAcquired, IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Frequency, LastAcquired, IssueDate from magazines WHERE Title=? AND Status="Active"', [magazine["bookid"]], ) for terms in searchmags_temp: searchmags.append(terms) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] frequency = searchmag[1] last_acquired = searchmag[2] issue_date = searchmag[3] dic = {"...": "", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": ""} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8") searchlist.append({"bookid": bookid, "searchterm": searchterm}) if ( not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZB_DOWNLOADER_NZBGET ): logger.info("No download method is set, use SABnzbd/NZBGet or blackhole") if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2 and not lazylibrarian.USENETCRAWLER: logger.info("No providers are set. try use NEWZNAB.") if searchlist == []: logger.info("There is nothing to search for. Mark some magazines as active.") for book in searchlist: resultlist = providers.IterateOverNewzNabSites(book, "mag") if not resultlist: logger.debug("Adding magazine %s to queue." % book["searchterm"]) else: bad_regex = 0 old_date = 0 total_nzbs = 0 new_date = 0 for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb["bookid"] nzbtitle = nzb["nzbtitle"] nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbsize_temp = nzb["nzbsize"] nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + " MB" nzbdate = formatter.nzbdate2format(nzbdate_temp) checkifmag = myDB.select("SELECT * from magazines WHERE Title=?", [bookid]) if checkifmag: for results in checkifmag: control_date = results["IssueDate"] frequency = results["Frequency"] regex = results["Regex"] nzbtitle_formatted = ( nzb["nzbtitle"] .replace(".", " ") .replace("-", " ") .replace("/", " ") .replace("+", " ") .replace("_", " ") .replace("(", "") .replace(")", "") ) # Need to make sure that substrings of magazine titles don't get found (e.g. Maxim USA will find Maximum PC USA) keyword_check = nzbtitle_formatted.replace(bookid, "") # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(" ") bookid_exploded = bookid.split(" ") # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now name_len = len(bookid_exploded) if len(nzbtitle_exploded) > name_len: # needs to be longer as it should include a date while name_len: name_len = name_len - 1 if nzbtitle_exploded[name_len].lower() != bookid_exploded[name_len].lower(): name_match = 0 # name match failed if name_match: if len(nzbtitle_exploded) > 1: # regexA = DD MonthName YYYY OR MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(regexA_month_temp) if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) else: regexA_day = "01" newdatish_regexA = regexA_year + regexA_month + regexA_day try: int(newdatish_regexA) newdatish = regexA_year + "-" + regexA_month + "-" + regexA_day except: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(regexB_month_temp) newdatish_regexB = regexB_year + regexB_month + regexB_day try: int(newdatish_regexB) newdatish = regexB_year + "-" + regexB_month + "-" + regexB_day except: # regexC = YYYY-MM regexC_last = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexC_exploded = regexC_last.split("-") if len(regexC_exploded) == 2: regexC_year = regexC_exploded[0] regexC_month = regexC_exploded[1].zfill(2) regexC_day = "01" newdatish_regexC = regexC_year + regexC_month + regexC_day elif len(regexC_exploded) == 3: regexC_year = regexC_exploded[0] regexC_month = regexC_exploded[1].zfill(2) regexC_day = regexC_exploded[2].zfill(2) newdatish_regexC = regexC_year + regexC_month + regexC_day else: newdatish_regexC = "Invalid" try: int(newdatish_regexC) newdatish = regexC_year + "-" + regexC_month + "-" + regexC_day except: logger.debug("NZB %s not in proper date format." % nzbtitle_formatted) bad_regex = bad_regex + 1 continue else: continue # Don't want to overwrite status = Skipped for NZBs that have been previously found wanted_status = myDB.select("SELECT * from wanted WHERE NZBtitle=?", [nzbtitle]) if wanted_status: for results in wanted_status: status = results["Status"] else: status = "Skipped" controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": status, "NZBsize": nzbsize, } myDB.upsert("wanted", newValueDict, controlValueDict) if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: myDB.upsert( "magazines", {"LastAcquired": nzbdate, "IssueDate": newdatish}, {"Title": bookid} ) maglist.append( {"bookid": bookid, "nzbprov": nzbprov, "nzbtitle": nzbtitle, "nzburl": nzburl} ) logger.debug("This issue of %s is new, downloading" % nzbtitle_formatted) new_date = new_date + 1 else: logger.debug("This issue of %s is old; skipping." % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug("NZB [%s] does not completely match search term [%s]." % (nzbtitle, bookid)) bad_regex = bad_regex + 1 logger.info( "Found %s NZBs for %s. %s are new, %s are old, and %s fail name or date matching" % (total_nzbs, bookid, new_date, old_date, bad_regex) ) return maglist
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"' ) else: searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No downloadmethod is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB: logger.info('No providers are set.') for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB and not resultlist: logger.info('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book) if lazylibrarian.NZBMATRIX and not resultlist: logger.info('Searching NZB at provider NZBMatrix ...') resultlist = providers.NZBMatrix(book) if not resultlist: logger.info( "Search didn't have results. Adding book %s to queue." % book['searchterm']) else: for nzb in resultlist: bookid = nzb['bookid'] nzbtitle = nzb['nzbtitle'] nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbtitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbtitle, nzburl) time.sleep(1)
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle) nzbBook_match = fuzz.token_set_ratio(title, nzbTitle) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle)) rejected = False for word in reject_list: if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzbTitle, word)) break if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def processCSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found, and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Alternate Directory must not be empty") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row # by taking a slice from item 1 as we don't need the very first header. headers = row[1:] else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. content[row[0]] = dict(zip(headers, row[1:])) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of bookIDs # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys())) for bookid in content.keys(): authorname = formatter.latinToAscii(content[bookid]['Author']) authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug(u"CSV: Author %s found in database" % (authorname)) else: logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = 0 isbn10 = "" isbn13 = "" bookname = formatter.latinToAscii(content[bookid]['Title']) if 'ISBN' in headers: isbn10 = content[bookid]['ISBN'] if 'ISBN13' in headers: isbn13 = content[bookid]['ISBN13'] # try to find book in our database using isbn, or if that fails, name matching if formatter.is_valid_isbn(isbn10): bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone() if not bookmatch: if formatter.is_valid_isbn(isbn13): bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone() if not bookmatch: bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone() if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))
def searchmagazines(mags=None): maglist = [] myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: searchmags = myDB.select( 'SELECT Title, Frequency, LastAcquired, IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Frequency, LastAcquired, IssueDate from magazines WHERE Title=? AND Status="Active"', [magazine['bookid']]) for terms in searchmags_temp: searchmags.append(terms) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] frequency = searchmag[1] last_acquired = searchmag[2] issue_date = searchmag[3] dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } searchterm = formatter.latinToAscii( formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2 and not lazylibrarian.USENETCRAWLER: logger.info('No providers are set. try use NEWZNAB.') if searchlist == []: logger.info( 'There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book, "1") if lazylibrarian.NEWZNAB2: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST2) resultlist += providers.NewzNab(book, "2") if lazylibrarian.USENETCRAWLER: logger.info('Searching NZB\'s at provider UsenetCrawler ...') resultlist += providers.UsenetCrawler(book, 'mag') #AHHH pass the book not the search book - bloody names the same, so wrong keys passing over if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: bad_regex = 0 old_date = 0 total_nzbs = 0 new_date = 0 for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = nzb['nzbtitle'] nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) checkifmag = myDB.select( 'SELECT * from magazines WHERE Title=?', [bookid]) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] frequency = results['Frequency'] regex = results['Regex'] nzbtitle_formatted = nzb['nzbtitle'].replace( '.', ' ').replace('/', ' ').replace('+', ' ').replace( '_', ' ').replace('(', '').replace(')', '') #Need to make sure that substrings of magazine titles don't get found (e.g. Maxim USA will find Maximum PC USA) keyword_check = nzbtitle_formatted.replace(bookid, '') #remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') bookid_exploded = bookid.split(' ') #Make sure that NZB contains exact magazine phrase, and that NZB title begins with magazine title #logger.debug('[%s] !=[%s] & [%s] == [%s]' %(keyword_check.lower(),nzbtitle_formatted.lower(),nzbtitle_exploded[0].lower(),bookid_exploded[0].lower())) if keyword_check.lower() != nzbtitle_formatted.lower( ) and nzbtitle_exploded[0].lower( ) == bookid_exploded[0].lower(): if len(nzbtitle_exploded) > 1: #regexA = DD MonthName YYYY OR MonthName YYYY regexA_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num( regexA_month_temp) if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 3].zfill(2) else: regexA_day = '01' newdatish_regexA = regexA_year + regexA_month + regexA_day try: int(newdatish_regexA) newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day except: #regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexB_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 2].zfill(2) regexB_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num( regexB_month_temp) newdatish_regexB = regexB_year + regexB_month + regexB_day try: int(newdatish_regexB) newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day except: #regexC = YYYY-MM regexC_last = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexC_exploded = regexC_last.split('-') if len(regexC_exploded) == 2: regexC_year = regexC_exploded[0] regexC_month = regexC_exploded[ 1].zfill(2) regexC_day = '01' newdatish_regexC = regexC_year + regexC_month + regexC_day elif len(regexC_exploded) == 3: regexC_year = regexC_exploded[0] regexC_month = regexC_exploded[ 1].zfill(2) regexC_day = regexC_exploded[2].zfill( 2) newdatish_regexC = regexC_year + regexC_month + regexC_day else: newdatish_regexC = 'Invalid' try: int(newdatish_regexC) newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day except: logger.debug( 'NZB %s not in proper date format.' % nzbtitle_formatted) bad_regex = bad_regex + 1 continue else: continue #Don't want to overwrite status = Skipped for NZBs that have been previously found wanted_status = myDB.select( 'SELECT * from wanted WHERE NZBtitle=?', [nzbtitle]) if wanted_status: for results in wanted_status: status = results['Status'] else: status = "Skipped" controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": status, "NZBsize": nzbsize } myDB.upsert("wanted", newValueDict, controlValueDict) #print nzbtitle_formatted #print newdatish if control_date is None: myDB.upsert("magazines", { "LastAcquired": nzbdate, "IssueDate": newdatish }, {"Title": bookid}) maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl }) new_date = new_date + 1 else: comp_date = formatter.datecompare( newdatish, control_date) if comp_date > 0: myDB.upsert( "magazines", { "LastAcquired": nzbdate, "IssueDate": newdatish }, {"Title": bookid}) maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl }) new_date = new_date + 1 else: logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug( 'NZB [%s] does not completely match search term [%s].' % (nzbtitle, bookid)) bad_regex = bad_regex + 1 logger.info( 'Found %s NZBs for %s. %s are new, %s are old, and %s have bad date formatting' % (total_nzbs, bookid, new_date, old_date, bad_regex)) return maglist
def search_tor_book(books=None, reset=False): if not lazylibrarian.USE_TOR(): logger.warn('No Torrent providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc' ) else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("TOR search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('TOR Searching for one book') else: logger.info('TOR Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook['BookID'] author = searchbook['AuthorName'] book = searchbook['BookName'] dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip() }) tor_count = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'book') if not nproviders: logger.warn('No torrent providers are set, check config') return # No point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug( "Searches returned no results. Adding book %s to queue." % book['searchterm']) else: tor_count = tor_count + 1 if tor_count == 1: logger.info("TORSearch for Wanted items complete, found %s book" % tor_count) else: logger.info("TORSearch for Wanted items complete, found %s books" % tor_count) if reset: common.schedule_job(action='Restart', target='search_tor_book')