def checkRunningJobs(): # make sure the relevant jobs are running # search jobs start when something gets marked "wanted" but are # not aware of any config changes that happen later, ie enable or disable providers, # so we check whenever config is saved # processdir is started when something gets marked "snatched" # and cancels itself once everything is processed so should be ok # but check anyway for completeness... myDB = database.DBConnection() snatched = myDB.match("SELECT count('Status') as counter from wanted WHERE Status = 'Snatched'") wanted = myDB.match("SELECT count('Status') as counter FROM books WHERE Status = 'Wanted'") if snatched: ensureRunning('processDir') if wanted: if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_DIRECT(): ensureRunning('search_book') if lazylibrarian.USE_RSS(): ensureRunning('search_rss_book') else: scheduleJob('Stop', 'search_book') scheduleJob('Stop', 'search_rss_book') if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_RSS() or lazylibrarian.USE_DIRECT(): ensureRunning('search_magazines') else: scheduleJob('Stop', 'search_magazines') ensureRunning('authorUpdate')
def _forceBookSearch(self, **kwargs): if lazylibrarian.USE_NZB(): if 'wait' in kwargs: search_nzb_book() else: threading.Thread(target=search_nzb_book, name='API-SEARCHNZB', args=[]).start() if lazylibrarian.USE_TOR(): if 'wait' in kwargs: search_tor_book() else: threading.Thread(target=search_tor_book, name='API-SEARCHTOR', args=[]).start() if lazylibrarian.USE_RSS(): if 'wait' in kwargs: search_rss_book() else: threading.Thread(target=search_rss_book, name='API-SEARCHRSS', args=[]).start() if not lazylibrarian.USE_RSS() and not lazylibrarian.USE_NZB( ) and not lazylibrarian.USE_TOR(): self.data = "No search methods set, check config"
def _searchBook(self, **kwargs): if 'id' not in kwargs: self.data = 'Missing parameter: id' return else: self.id = kwargs['id'] books = [{"bookid": id}] if lazylibrarian.USE_RSS(): if 'wait' in kwargs: search_rss_book() else: threading.Thread(target=search_rss_book, args=[books]).start() if lazylibrarian.USE_NZB(): if 'wait' in kwargs: search_nzb_book() else: threading.Thread(target=search_nzb_book, args=[books]).start() if lazylibrarian.USE_TOR(): if 'wait' in kwargs: search_tor_book() else: threading.Thread(target=search_tor_book, args=[books]).start() if not lazylibrarian.USE_RSS() and not lazylibrarian.USE_NZB( ) and not lazylibrarian.USE_TOR(): self.data = "No search methods set, check config"
def scheduleJob(action='Start', target=None): """ Start or stop or restart a cron job by name eg target=search_magazines, target=processDir, target=search_tor_book """ if target is None: return if action == 'Stop' or action == 'Restart': for job in lazylibrarian.SCHED.get_jobs(): if target in str(job): lazylibrarian.SCHED.unschedule_job(job) logger.debug("Stop %s job" % (target)) if action == 'Start' or action == 'Restart': for job in lazylibrarian.SCHED.get_jobs(): if target in str(job): logger.debug("%s %s job, already scheduled" % (action, target)) return # return if already running, if not, start a new one if 'processDir' in target and int(lazylibrarian.SCAN_INTERVAL): lazylibrarian.SCHED.add_interval_job( lazylibrarian.postprocess.cron_processDir, minutes=int(lazylibrarian.SCAN_INTERVAL)) logger.debug("%s %s job" % (action, target)) elif 'search_magazines' in target and int(lazylibrarian.SEARCH_INTERVAL): if lazylibrarian.USE_TOR() or lazylibrarian.USE_NZB() or lazylibrarian.USE_RSS(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchmag.cron_search_magazines, minutes=int(lazylibrarian.SEARCH_INTERVAL)) logger.debug("%s %s job" % (action, target)) elif 'search_nzb_book' in target and int(lazylibrarian.SEARCH_INTERVAL): if lazylibrarian.USE_NZB(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchnzb.cron_search_nzb_book, minutes=int(lazylibrarian.SEARCH_INTERVAL)) logger.debug("%s %s job" % (action, target)) elif 'search_tor_book' in target and int(lazylibrarian.SEARCH_INTERVAL): if lazylibrarian.USE_TOR(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchtorrents.cron_search_tor_book, minutes=int(lazylibrarian.SEARCH_INTERVAL)) logger.debug("%s %s job" % (action, target)) elif 'search_rss_book' in target and int(lazylibrarian.SEARCHRSS_INTERVAL): if lazylibrarian.USE_RSS(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchrss.search_rss_book, minutes=int(lazylibrarian.SEARCHRSS_INTERVAL)) logger.debug("%s %s job" % (action, target)) elif 'checkForUpdates' in target and int(lazylibrarian.VERSIONCHECK_INTERVAL): lazylibrarian.SCHED.add_interval_job( lazylibrarian.versioncheck.checkForUpdates, hours=int(lazylibrarian.VERSIONCHECK_INTERVAL)) logger.debug("%s %s job" % (action, target))
def _forceRSSSearch(self, **kwargs): if lazylibrarian.USE_RSS(): if 'wait' in kwargs: search_rss_book() else: threading.Thread(target=search_rss_book, name='API-SEARCHRSS', args=[]).start() else: self.data = 'No rss wishlists set, check config'
def _forceMagSearch(self, **kwargs): if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_RSS(): if 'wait' in kwargs: search_magazines(None, True) else: threading.Thread(target=search_magazines, name='API-SEARCHMAGS', args=[None, True]).start() else: self.data = 'No search methods set, check config'
def _forceBookSearch(self, **kwargs): if 'type' in kwargs: library = kwargs['type'] else: library = None if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_RSS() or lazylibrarian.USE_DIRECT(): if 'wait' in kwargs: search_book(library=library) else: threading.Thread(target=search_book, name='API-SEARCHBOOK', args=[None, library]).start() else: self.data = "No search methods set, check config"
def _searchBook(self, **kwargs): if 'id' not in kwargs: self.data = 'Missing parameter: id' return books = [{"bookid": kwargs['id']}] if 'type' in kwargs: library = kwargs['type'] else: library = None if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_RSS() or lazylibrarian.USE_DIRECT(): if 'wait' in kwargs: search_book(books=books, library=library) else: threading.Thread(target=search_book, name='API-SEARCHBOOK', args=[books, library]).start() else: self.data = "No search methods set, check config"
def scheduleJob(action='Start', target=None): """ Start or stop or restart a cron job by name eg target=search_magazines, target=processDir, target=search_book """ if target is None: return if action == 'Stop' or action == 'Restart': for job in lazylibrarian.SCHED.get_jobs(): if target in str(job): lazylibrarian.SCHED.unschedule_job(job) logger.debug("Stop %s job" % target) if action == 'Start' or action == 'Restart': for job in lazylibrarian.SCHED.get_jobs(): if target in str(job): logger.debug("%s %s job, already scheduled" % (action, target)) return # return if already running, if not, start a new one if 'processDir' in target and check_int(lazylibrarian.CONFIG['SCAN_INTERVAL'], 0): minutes = check_int(lazylibrarian.CONFIG['SCAN_INTERVAL'], 0) lazylibrarian.SCHED.add_interval_job( lazylibrarian.postprocess.cron_processDir, minutes=minutes) logger.debug("%s %s job in %s minute%s" % (action, target, minutes, plural(minutes))) elif 'search_magazines' in target and check_int(lazylibrarian.CONFIG['SEARCH_INTERVAL'], 0): minutes = check_int(lazylibrarian.CONFIG['SEARCH_INTERVAL'], 0) if lazylibrarian.USE_TOR() or lazylibrarian.USE_NZB() \ or lazylibrarian.USE_RSS() or lazylibrarian.USE_DIRECT(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchmag.cron_search_magazines, minutes=minutes) logger.debug("%s %s job in %s minute%s" % (action, target, minutes, plural(minutes))) elif 'search_book' in target and check_int(lazylibrarian.CONFIG['SEARCH_INTERVAL'], 0): minutes = check_int(lazylibrarian.CONFIG['SEARCH_INTERVAL'], 0) if lazylibrarian.USE_NZB() or lazylibrarian.USE_TOR() or lazylibrarian.USE_DIRECT(): lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchbook.cron_search_book, minutes=minutes) logger.debug("%s %s job in %s minute%s" % (action, target, minutes, plural(minutes))) elif 'search_rss_book' in target and check_int(lazylibrarian.CONFIG['SEARCHRSS_INTERVAL'], 0): if lazylibrarian.USE_RSS(): minutes = check_int(lazylibrarian.CONFIG['SEARCHRSS_INTERVAL'], 0) lazylibrarian.SCHED.add_interval_job( lazylibrarian.searchrss.search_rss_book, minutes=minutes) logger.debug("%s %s job in %s minute%s" % (action, target, minutes, plural(minutes))) elif 'checkForUpdates' in target and check_int(lazylibrarian.CONFIG['VERSIONCHECK_INTERVAL'], 0): hours = check_int(lazylibrarian.CONFIG['VERSIONCHECK_INTERVAL'], 0) lazylibrarian.SCHED.add_interval_job( lazylibrarian.versioncheck.checkForUpdates, hours=hours) logger.debug("%s %s job in %s hour%s" % (action, target, hours, plural(hours))) elif 'syncToGoodreads' in target and lazylibrarian.CONFIG['GR_SYNC']: if check_int(lazylibrarian.CONFIG['GOODREADS_INTERVAL'], 0): hours = check_int(lazylibrarian.CONFIG['GOODREADS_INTERVAL'], 0) lazylibrarian.SCHED.add_interval_job( lazylibrarian.grsync.cron_sync_to_gr, hours=hours) logger.debug("%s %s job in %s hour%s" % (action, target, hours, plural(hours))) elif 'authorUpdate' in target and check_int(lazylibrarian.CONFIG['CACHE_AGE'], 0): # Try to get all authors scanned evenly inside the cache age minutes = check_int(lazylibrarian.CONFIG['CACHE_AGE'], 0) * 24 * 60 myDB = database.DBConnection() cmd = "select count('AuthorID') as counter from Authors where Status='Active' or Status='Wanted'" cmd += " or Status='Loading'" authors = myDB.match(cmd) authcount = authors['counter'] if not authcount: minutes = 60 else: minutes = int(minutes / authcount) if minutes < 10: # set a minimum interval of 10 minutes so we don't upset goodreads/librarything api minutes = 10 if minutes <= 600: # for bigger intervals switch to hours lazylibrarian.SCHED.add_interval_job(authorUpdate, minutes=minutes) logger.debug("%s %s job in %s minute%s" % (action, target, minutes, plural(minutes))) else: hours = int(minutes / 60) lazylibrarian.SCHED.add_interval_job(authorUpdate, hours=hours) logger.debug("%s %s job in %s hour%s" % (action, target, hours, plural(hours)))
def searchItem(item=None, bookid=None, cat=None): """ Call all active search providers to search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url item = searchterm to use for general search bookid = link to data for book/audio searches cat = category to search [general, book, audio] """ results = [] if not item: return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm if cat in ['book', 'audio']: myDB = database.DBConnection() cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID' cmd += ' and bookID=?' match = myDB.match(cmd, (bookid,)) if match: book['authorName'] = match['authorName'] book['bookName'] = match['bookName'] book['bookSub'] = match['bookSub'] else: logger.debug('Forcing general search') cat = 'general' nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nprov = IterateOverNewzNabSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nprov = IterateOverTorrentSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_DIRECT(): resultlist, nprov = IterateOverDirectSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nprov = IterateOverRSSSites() if nprov: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage - torrents might have words_with_underscore_separator score = fuzz.token_set_ratio(searchterm, title.replace('_', ' ')) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': quote_plus(url), 'mode': mode} searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm)) return searchresults
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: wlist.append(unaccented(word).lower()) for word in bookid_exploded: if unaccented(word).lower() not in wlist: rejected = True break if rejected: logger.debug( u"Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug(u"Magazine matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']: bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],)) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info(u'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: import_book(book['rss_bookid']) new_books += 1 else: item = {} headers = [] item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] headers.append('BookID') if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] headers.append('ISBN') bookmatch = finditem(item, book['rss_author'], headers) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: # not in database yet results = '' if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not results: searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'],)) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders = IterateOverRSSSites() if not nproviders and not wishproviders: logger.warn('No rss providers are available') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm}) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm}) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug(u"Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Title'] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } searchterm = unaccented_str(replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn( 'There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn( 'No nzb providers are set. Check config for NEWZNAB or TORZNAB providers' ) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn( 'No torrent providers are set. Check config for TORRENT providers' ) if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag') if not nproviders: logger.warn( 'No rss providers are set. Check config for RSS providers') if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = unaccented_str(nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title="%s"' % bookid) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_regex = bad_regex + 1 else: control_date = results['IssueDate'] reject_list = getList(results['Regex']) dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date rejected = False if len(nzbtitle_exploded) > len( bookid_exploded ): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( unaccented(bookid), unaccented(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) rejected = True else: rejected = True if not rejected: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % nzburl) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, already_failed['NZBprov'])) rejected = True if not rejected: lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break # maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) # if maxsize and nzbsize > maxsize: # rejected = True # logger.debug("Rejecting %s, too large" % nzbtitle_formatted) if not rejected: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in ['pdf', 'true', 'truepdf']: nzbtitle_exploded.pop( ) # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] regexA_month = month2num( unaccented(regexA_month_temp)) if not regexA_year.isdigit() or int( regexA_year) < 1900 or int( regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day ) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] regexB_month = month2num( unaccented(regexB_month_temp)) regexB_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int( regexB_year) < 1900 or int( regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date( int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] if regexC_year.isdigit( ) and int(regexC_year) > 1900 and int( regexC_year) < 2100: regexC_month = nzbtitle_exploded[ len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] regexC_month = 0 regexC_day = 0 if regexC_year.isdigit( ) and int(regexC_year) > 1900 and int( regexC_year) < 2100: regexC_month = nzbtitle_exploded[ len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date( int(regexC_year), int(regexC_month), int(regexC_day)) except Exception: # regexD Issue/No/Vol nn, YYYY or Issue/No/Vol nn try: IssueLabel = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] if IssueLabel.lower() in [ "issue", "no", "vol" ]: # issue nn regexD_issue = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] if regexD_issue.isdigit(): newdatish = str( int(regexD_issue) ) # 4 == 04 == 004 else: IssueLabel = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] if IssueLabel.lower() in [ "issue", "no", "vol" ]: # issue nn, YYYY regexD_issue = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] regexD_issue = regexD_issue.strip( ',') if regexD_issue.isdigit(): newdatish = str( int(regexD_issue) ) # 4 == 04 == 004 else: raise ValueError regexD_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] if regexD_year.isdigit(): if int( regexD_year ) < int(datetime.date. today().year): newdatish = 0 # it's old else: raise ValueError except Exception: logger.debug( 'Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: logger.debug( 'Magazine [%s] does not match the search term [%s].' % (nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 continue # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if '-' in str(newdatish): start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) else: control_date = 0 if '-' in str(control_date) and '-' in str(newdatish): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(newdatish, control_date) elif '-' not in str(control_date) and '-' not in str( newdatish): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly else: # invalid comparison of date and issue number logger.debug( 'Magazine %s incorrect date or issue format.' % nzbtitle_formatted) bad_date = bad_date + 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.select( 'SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % (insert_table, nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) else: # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted) bad_regex = bad_regex + 1 logger.info( 'Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_regex, len(maglist))) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("%s from %s at %s" % (unaccented( magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='processDir') maglist = [] if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if not mags: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if not mags: # backlog search searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'],)) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored # logger.debug("Removing old magazine search results") # myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] datetype = searchmag['DateType'] if not datetype: datetype = '' if not searchterm: dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype}) if not searchlist: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn('No nzb providers are available. Check config and blocklist') lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn('No direct providers are available. Check config and blocklist') lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'direct' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn('No tor providers are available. Check config and blocklist') lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders, dltypes = IterateOverRSSSites() if not nproviders or 'M' not in dltypes: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn('No rss providers are available. Check config and blocklist') lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs if 'M' in item['types']: resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item['tor_date'], # may be fake date as none returned from rss torrents 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int(nzbsize_temp, 1000) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) so split into "words" dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} nzbtitle_formatted = replace_all(nzbtitle, dic) # remove extra spaces if they're in a row nzbtitle_formatted = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_formatted.split() results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,)) if not results: logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: bookid_exploded = replace_all(bookid, dic).split() # Check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: word = unaccented(word).lower() if word: wlist.append(word) for word in bookid_exploded: word = unaccented(word).lower() if word and word not in wlist: logger.debug("Rejecting %s, missing %s" % (nzbtitle, word)) rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug( "Magazine title matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList(str(results['Reject']).lower()) reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if rejected: rejects += 1 else: regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded) if regex_pass: logger.debug('Issue %s (regex %s) for %s ' % (issuedate, regex_pass, nzbtitle_formatted)) datetype_ok = True datetype = book['datetype'] if datetype: # check all wanted parts are in the regex result # Day Month Year Vol Iss (MM needs two months) if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]: datetype_ok = False elif 'D' in datetype and regex_pass not in [3, 5, 6]: datetype_ok = False elif 'MM' in datetype and regex_pass not in [1]: # bi monthly datetype_ok = False elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]: datetype_ok = False elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]: datetype_ok = False elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]: datetype_ok = False elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18]: datetype_ok = False else: datetype_ok = False logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers issuedate = "1970-01-01" # provide a fake date for bad-date issues # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" or "Have" insert_table = "pastissues" comp_date = 0 if datetype_ok: control_date = results['IssueDate'] logger.debug("Control date: [%s]" % control_date) if not control_date: # we haven't got any copies of this magazine yet # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # For magazines with only an issue number use zero as we can't tell age if str(issuedate).isdigit(): logger.debug('Magazine comparing issue numbers (%s)' % issuedate) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(issuedate)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) logger.debug('Magazine date comparing to %s' % control_date) else: logger.debug('Magazine unable to find comparison type [%s]' % issuedate) control_date = 0 if str(control_date).isdigit() and str(issuedate).isdigit(): # for issue numbers, check if later than last one we have if regex_pass in [10, 12, 13] and year: issuedate = "%s%04d" % (year, int(issuedate)) else: issuedate = str(issuedate).zfill(4) if not control_date: comp_date = 1 else: comp_date = int(issuedate) - int(control_date) elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(issuedate)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(issuedate, control_date) else: # invalid comparison of date and issue number comp_date = 0 if re.match('\d+-\d\d-\d\d', str(control_date)): if regex_pass > 9 and year: # we assumed it was an issue number, but it could be a date year = check_int(year, 0) if regex_pass in [10, 12, 13]: issuedate = int(issuedate[:4]) issuenum = check_int(issuedate, 0) if year and 1 <= issuenum <= 12: issuedate = "%04d-%02d-01" % (year, issuenum) comp_date = datecompare(issuedate, control_date) if not comp_date: logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted) if not comp_date: bad_date += 1 issuedate = "1970-01-01" if issuedate == "1970-01-01": logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted) elif not datetype_ok: logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted) elif comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + issuedate if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug(str(issues)) insert_table = "wanted" nzbdate = now() # when we asked for it else: logger.debug('This issue of %s is already flagged for download' % issue) else: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('%s is already in %s marked %s' % (nzbtitle, insert_table, mag_entry['Status'])) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } if insert_table == 'pastissues': # try to mark ones we've already got match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?", (bookid, issuedate)) if match: insert_status = "Have" else: insert_status = "Skipped" else: insert_status = "Wanted" newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": issuedate, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch, res = TORDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') elif magazine['nzbmode'] == 'direct': snatch, res = DirectDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') elif magazine['nzbmode'] == 'nzb': snatch, res = NZBDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"]) logger.error(res) snatch = 0 if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl'])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='PostProcessor') else: myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (res, magazine["nzburl"])) if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def search_wishlist(): if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_wishlist') return try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHWISHLIST" myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') scheduleJob(action='Stop', target='search_wishlist') return # No point in continuing # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[ 'rss_bookid']: bookmatch = myDB.match( 'select Status,BookName from books where bookid=?', (book['rss_bookid'], )) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info('Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info('Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: import_book(book['rss_bookid']) new_books += 1 else: item = {} results = None item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] bookmatch = finditem(item, book['rss_author']) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( 'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( 'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: # not in database yet if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int( lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not results: searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName( book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) result = results[0] # type: dict msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) except Exception: logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def search_rss_book(books=None, reset=False): if not(lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') common.schedule_job(action='Stop', target='search_rss_book') return # rename this thread threading.currentThread().name = "SEARCHRSSBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("RSS search requested for no books") return elif len(searchbooks) == 1: logger.info('RSS Searching for one book') else: logger.info('RSS Searching for %i books' % len(searchbooks)) resultlist, nproviders = providers.IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} rss_count = 0 for book in searchbooks: bookid = book['BookID'] author = book['AuthorName'] title = book['BookName'] author = formatter.latinToAscii(formatter.replace_all(author, dic)) title = formatter.latinToAscii(formatter.replace_all(title, dic)) found = processResultList(resultlist, author, title, book) # if you can't find the book, try author without initials, # and title without any "(extended details, series etc)" if not found: if author[1] in '. ' or '(' in title: # anything to shorten? while author[1] in '. ': # strip any initials author = author[2:].strip() # and leading whitespace if '(' in title: title = title.split('(')[0] found = processResultList(resultlist, author, title, book) if not found: logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title)) else: rss_count = rss_count + 1 if rss_count == 1: logger.info("RSS Search for Wanted items complete, found %s book" % rss_count) else: logger.info("RSS Search for Wanted items complete, found %s books" % rss_count) if reset == True: common.schedule_job(action='Restart', target='search_rss_book')
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return try: threadname = threading.currentThread().name if "Thread-" in threadname: if not books: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" myDB = database.DBConnection() searchbooks = [] if not books: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders, _ = IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are available') scheduleJob(action='Stop', target='search_rss_book') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("RSS Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > 1: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def search_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLBOOKS" else: threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: if not book['bookid'] in ['booklang', 'library', 'ignored']: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) if results: for terms in results: searchbooks.append(terms) else: logger.debug( "SearchBooks - BookID %s is not in the database" % book['bookid']) if len(searchbooks) == 0: logger.debug("SearchBooks - No books to search for") return nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR( ) + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() if nprov == 0: logger.debug("SearchBooks - No providers to search") return modelist = [] if lazylibrarian.USE_NZB(): modelist.append('nzb') if lazylibrarian.USE_TOR(): modelist.append('tor') if lazylibrarian.USE_DIRECT(): modelist.append('direct') if lazylibrarian.USE_RSS(): modelist.append('rss') logger.info('Searching %s provider%s %s for %i book%s' % (nprov, plural(nprov), str(modelist), len(searchbooks), plural(len(searchbooks)))) for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook[ 'Status'] == "Wanted": # not just audiobook wanted cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook[ 'AudioStatus'] == "Wanted": # in case we just wanted eBook cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) # only get rss results once per run, as they are not search specific rss_resultlist = None if 'rss' in modelist: rss_resultlist, nprov = IterateOverRSSSites() if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow modelist.remove('rss') book_count = 0 for book in searchlist: matches = [] for mode in modelist: # first attempt, try author/title in category "book" if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' resultlist = None if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') elif mode == 'tor' and 'tor' in modelist: resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow modelist.remove('tor') elif mode == 'direct' and 'direct' in modelist: resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow modelist.remove('direct') elif mode == 'rss' and 'rss' in modelist: if rss_resultlist: resultlist = rss_resultlist else: logger.debug("No active rss providers found") modelist.remove('rss') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book, try author/title without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['bookName']: searchtype = 'short' + searchtype if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') elif mode == 'tor' and 'tor' in modelist: resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow modelist.remove('tor') elif mode == 'direct' and 'direct' in modelist: resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow modelist.remove('direct') elif mode == 'rss' and 'rss' in modelist: resultlist = rss_resultlist if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book under "books", you might find under general search # general search is the same as booksearch for torrents and rss, no need to check again if not goodEnough(match): searchtype = 'general' if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if still not found, try general search again without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['searchterm']: searchtype = 'shortgeneral' if mode == 'nzb' and 'nzb' in modelist: resultlist, _ = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None if not goodEnough(match): logger.info( "%s Searches for %s %s returned no results." % (mode.upper(), book['library'], book['searchterm'])) else: logger.info("Found %s result: %s %s%%, %s priority %s" % (mode.upper(), searchtype, match[0], match[2]['NZBprov'], match[4])) matches.append(match) if matches: highest = max(matches, key=lambda s: (s[0], s[4])) # sort on percentage and priority logger.info("Requesting %s download: %s%% %s: %s" % (book['library'], highest[0], highest[2]['NZBprov'], highest[1])) if downloadResult(highest, book) > True: book_count += 1 # we found it logger.info("Search for Wanted items complete, found %s book%s" % (book_count, plural(book_count))) except Exception: logger.error('Unhandled exception in search_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def searchItem(item=None, bookid=None): """ Call all active search providers asking for a "general" search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url """ results = [] if not item: return results if not internet(): logger.debug('Search Item: No internet connection') return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm nproviders = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR( ) + lazylibrarian.USE_RSS() logger.debug('Searching %s providers for %s' % (nproviders, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'general') if nproviders: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nproviders = IterateOverTorrentSites(book, 'general') if nproviders: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nproviders = IterateOverRSSSites() if nproviders: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage score = fuzz.token_set_ratio(searchterm, title) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? url = url.split('?')[0] result = { 'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': urllib.quote_plus(url), 'mode': mode } searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s results for %s' % (len(searchresults), searchterm)) return searchresults
def search_rss_book(books=None, reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc' ) else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: return logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) resultlist, nproviders = IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } rss_count = 0 for book in searchbooks: authorname, bookname = get_searchterm(book, "book") found = processResultList(resultlist, authorname, bookname, book, 'book') # if you can't find the book, try title without any "(extended details, series etc)" if not found: if '(' in bookname: # anything to shorten? authorname, bookname = get_searchterm(book, "shortbook") found = processResultList(resultlist, authorname, bookname, book, 'shortbook') if not found: logger.debug( "Searches returned no results. Adding book %s - %s to queue." % (authorname, bookname)) if found > True: rss_count = rss_count + 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) if reset: scheduleJob(action='Restart', target='search_rss_book')
def search_rss_book(books=None, reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return if not internet(): logger.warn('Search RSS Book: No internet connection') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverGoodReads() if not wishproviders: logger.debug('No rss wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted for book in resultlist: # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) if book['rss_bookid'] and lazylibrarian.CONFIG[ 'BOOK_API'] == "GoodReads": bookmatch = myDB.match( 'select Status,BookName from books where bookid="%s"' % book['rss_bookid']) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: import_book(book['rss_bookid']) else: item = {} headers = [] item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] headers.append('BookID') if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] headers.append('ISBN') bookmatch = finditem(item, book['rss_author'], headers) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: # not in database yet results = '' if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] if result['isbn_fuzz'] > lazylibrarian.CONFIG[ 'MATCH_RATIO']: logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not results: searchterm = "%s <ll> %s" % ( item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) # noinspection PyUnboundLocalVariable if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc' searchbooks = myDB.select(cmd) else: # The user has added a new book searchbooks = [] for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[ 'bookid'] cmd += 'AND books.Status="Wanted"' searchbook = myDB.select(cmd) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: return resultlist, nproviders = IterateOverRSSSites() if not nproviders: if not wishproviders: logger.warn('No rss providers are set, check config') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) rss_count = 0 for book in searchbooks: authorname, bookname = get_searchterm(book, "book") found = processResultList(resultlist, authorname, bookname, book, 'book') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in bookname: # anything to shorten? authorname, bookname = get_searchterm(book, "shortbook") found = processResultList(resultlist, authorname, bookname, book, 'shortbook') if not found: logger.debug( "Searches returned no results. Adding book %s - %s to queue." % (authorname, bookname)) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) if reset: scheduleJob(action='Restart', target='search_rss_book') except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())