Exemplo n.º 1
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        # try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        # except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
        global_name = common.remove_accents(global_name)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
        dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            # update nzbs
            controlValueDict = {"BookID": bookID}
            newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)
            processExtras(myDB, dest_path, global_name, data)
            logger.info('Successfully processed: %s' % global_name)
            notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)
            return False
Exemplo n.º 2
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        # try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        # except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
        global_name = common.remove_accents(global_name)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
        dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            # update nzbs
            controlValueDict = {"BookID": bookID}
            newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)
            processExtras(myDB, dest_path, global_name, data)
            logger.info('Successfully processed: %s' % global_name)
            notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)
            return False
Exemplo n.º 3
0
def export_CSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn("Please check Alternate Directory setting")
        return False

    csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-')))

    myDB = database.DBConnection()

    find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status)

    if not find_status:
        logger.warn(u"No books marked as %s" % status)
    else:
        count = 0
        with open(csvFile, 'wb') as csvfile:
            csvwrite = csv.writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=csv.QUOTE_MINIMAL)

            # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10)
            csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

            for resulted in find_status:
                logger.debug(u"Exported CSV for book %s" % resulted['BookName'])
                row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                        resulted['BookIsbn'], resulted['AuthorID']])
                csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                count = count + 1
        logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
Exemplo n.º 4
0
    def markMags(self, AuthorName=None, action=None, redirect=None, **args):
        myDB = database.DBConnection()
        if not redirect:
            redirect = "magazines"
        authorcheck = None
	maglist = []
        for nzburl in args:
            # ouch dirty workaround...
            if not nzburl == 'book_table_length':
                    controlValueDict = {'NZBurl': nzburl}
                    newValueDict = {'Status': action}
                    myDB.upsert("wanted", newValueDict, controlValueDict)
                    title = myDB.select("SELECT * from wanted WHERE NZBurl = ?", [nzburl])
                    for item in title:
			bookid = item['BookID']    
			nzbprov = item['NZBprov']                    
			nzbtitle = item['NZBtitle']
			nzburl = item['NZBurl']
			maglist.append({
				'bookid': bookid,
				'nzbprov': nzbprov,
				'nzbtitle': nzbtitle,
				'nzburl': nzburl
				})
                    logger.info('Status set to %s for %s' % (action, nzbtitle))

        # start searchthreads
        if action == 'Wanted':
          	for items in maglist:
			logger.debug('Snatching %s' % items['nzbtitle'])
            		snatch = DownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
            		notifiers.notify_snatch(items['nzbtitle']+' at '+formatter.now()) 
      		raise cherrypy.HTTPRedirect("magazines")
Exemplo n.º 5
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        if level != 'DEBUG' or lazylibrarian.LOGFULL is True:
            # Limit the size of the "in-memory" log, as gets slow if too long
            # Set a fairly arbitrary 500 message limit for now
            # TODO make this configurable?
            # Ensure messages are ascii as some author names contain accents and the web page doesnt like them
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), formatter.latinToAscii(message), level, threadname))
            if len(lazylibrarian.LOGLIST) > 500:
                del lazylibrarian.LOGLIST[-1]

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 6
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message).encode(
            lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGFULL is True:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT:
                del lazylibrarian.LOGLIST[-1]

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 7
0
def exportCSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
     
    if not search_dir:
        logger.warn("Alternate Directory must not be empty")
        return False
    
    csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, formatter.now()))  
    
    myDB = database.DBConnection() 
    
    find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status)
    
    if not find_status:
        logger.warn("No books marked as %s" % status)
    else:
        with open(csvFile, 'wb') as csvfile:
            csvwrite = csv.writer(csvfile, delimiter=',',
                quotechar='"', quoting=csv.QUOTE_MINIMAL)
                
            # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10)
            csvwrite.writerow([
                'BookID', 'Author', 'Title', 
                'ISBN', 'AuthorID'
                ])
        
            for resulted in find_status:
                logger.debug("Exported CSV for book %s" % resulted['BookName'].encode('utf-8'))
                row = ([
                    resulted['BookID'], resulted['AuthorName'], resulted['BookName'], 
                    resulted['BookIsbn'], resulted['AuthorID']       
                    ])
                csvwrite.writerow([("%s" % s).encode('utf-8') for s in row])
        logger.info("CSV exported to %s" % csvFile)
Exemplo n.º 8
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Ensure messages are utf-8 as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message).encode('utf-8')

        if level != 'DEBUG' or lazylibrarian.LOGFULL is True:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT:
                del lazylibrarian.LOGLIST[-1]

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 9
0
def export_CSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn("Please check Alternate Directory setting")
        return False

    csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-')))

    myDB = database.DBConnection()

    find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status)

    if not find_status:
        logger.warn(u"No books marked as %s" % status)
    else:
        count = 0
        with open(csvFile, 'wb') as csvfile:
            csvwrite = csv.writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=csv.QUOTE_MINIMAL)

            # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10)
            csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

            for resulted in find_status:
                logger.debug(u"Exported CSV for book %s" % resulted['BookName'])
                row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                        resulted['BookIsbn'], resulted['AuthorID']])
                csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                count = count + 1
        logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
Exemplo n.º 10
0
def export_CSV(search_dir=None, status="Wanted", library='eBook'):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg
        elif not os.access(search_dir, os.W_OK | os.X_OK):
            msg = "Alternate Directory [%s] not writable" % search_dir
            logger.warn(msg)
            return msg

        csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-')))

        myDB = database.DBConnection()

        cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors '
        if library == 'eBook':
            cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID'
        else:
            cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID'
        find_status = myDB.select(cmd, (status,))

        if not find_status:
            msg = "No %s marked as %s" % (library, status)
            logger.warn(msg)
        else:
            count = 0
            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=QUOTE_MINIMAL)

                # write headers, change AuthorName BookName BookIsbn to match import csv names
                csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

                for resulted in find_status:
                    logger.debug("Exported CSV for %s %s" % (library, resulted['BookName']))
                    row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                            resulted['BookIsbn'], resulted['AuthorID']])
                    if PY2:
                        csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                    else:
                        csvwrite.writerow([("%s" % s) for s in row])
                    count += 1
            msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile)
            logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Exemplo n.º 11
0
def export_CSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg
        elif not os.access(search_dir, os.W_OK | os.X_OK):
            msg = "Alternate Directory [%s] not writable" % search_dir
            logger.warn(msg)
            return msg

        csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-')))

        myDB = database.DBConnection()

        cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors '
        cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID'
        find_status = myDB.select(cmd, (status,))

        if not find_status:
            msg = "No books marked as %s" % status
            logger.warn(msg)
        else:
            count = 0
            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=QUOTE_MINIMAL)

                # write headers, change AuthorName BookName BookIsbn to match import csv names
                csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

                for resulted in find_status:
                    logger.debug("Exported CSV for book %s" % resulted['BookName'])
                    row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                            resulted['BookIsbn'], resulted['AuthorID']])
                    if PY2:
                        csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                    else:
                        csvwrite.writerow([("%s" % s) for s in row])
                    count += 1
            msg = "CSV exported %s book%s to %s" % (count, plural(count), csvFile)
            logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Exemplo n.º 12
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Weblog: Same behaviour as the file log
        # Todo: Seperate option in settings/log page
        if lazylibrarian.LOGLEVEL <= 1:
            if level != 'DEBUG':
                lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname))
        else:
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname))

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 13
0
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        if 'windows' in platform.system().lower(
        ):  # windows cp1252 can't handle some accents
            message = formatter.unaccented(message)
        elif PY2:
            message = formatter.safe_unicode(message)
            message = message.encode(lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0,
                                         (formatter.now(), level, threadname,
                                          program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > formatter.check_int(
                    lazylibrarian.CONFIG['LOGLIMIT'], 500):
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno,
                                          message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
Exemplo n.º 14
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        if level != 'DEBUG':
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname))

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 15
0
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        if level != 'DEBUG':
            lazylibrarian.LOGLIST.insert(
                0, (formatter.now(), message, level, threadname))

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
Exemplo n.º 16
0
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message)
        message = message.encode(lazylibrarian.SYS_ENCODING)
        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0,
                                         (formatter.now(), level, threadname,
                                          program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.CONFIG['LOGLIMIT']:
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno,
                                          message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
Exemplo n.º 17
0
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        if 'windows' in platform.system().lower():  # windows cp1252 can't handle some accents
            message = formatter.unaccented(message)
        elif PY2:
            message = formatter.safe_unicode(message)
            message = message.encode(lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > formatter.check_int(lazylibrarian.CONFIG['LOGLIMIT'], 500):
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
Exemplo n.º 18
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not mags:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if not mags:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'direct'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders, dltypes = IterateOverRSSSites()
                if not nproviders or 'M' not in dltypes:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        if 'M' in item['types']:
                            resultlist.append({
                                'bookid': book['bookid'],
                                'nzbprov': item['tor_prov'],
                                'nzbtitle': item['tor_title'],
                                'nzburl': item['tor_url'],
                                'nzbdate': item['tor_date'],  # may be fake date as none returned from rss torrents
                                'nzbsize': item['tor_size'],
                                'nzbmode': item['tor_type']
                            })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic)
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split()

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            bookid_exploded = replace_all(bookid, dic).split()

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemplo n.º 19
0
def import_book(pp_path=None, bookID=None):
    try:
        # Move a book into LL folder structure given just the folder and bookID, returns True or False
        # Called from "import_alternate" or if we find an "ll.(xxx)" folder that doesn't match a snatched book/mag
        #
        myDB = database.DBConnection()
        data = myDB.match('SELECT * from books WHERE BookID="%s"' % bookID)
        if data:
            authorname = data['AuthorName']
            bookname = data['BookName']
            processpath = lazylibrarian.DIRECTORY('Destination')

            if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

            dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
            global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
            global_name = unaccented(global_name)
            # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
            # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
            dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                   ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
            dest_path = unaccented_str(replace_all(dest_path, dic))
            dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                # update nzbs
                was_snatched = myDB.match('SELECT BookID, NZBprov FROM wanted WHERE BookID="%s"' % bookID)
                if was_snatched:
                    controlValueDict = {"BookID": bookID}
                    newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                    myDB.upsert("wanted", newValueDict, controlValueDict)
                    if bookname:
                        if len(lazylibrarian.IMP_CALIBREDB):
                            logger.debug('Calibre should have created the extras')
                        else:
                            processExtras(myDB, dest_path, global_name, data)

                    if not lazylibrarian.DESTINATION_COPY and pp_path != processpath:
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                notify_download("%s from %s at %s" % (global_name, was_snatched['NZBprov'], now()))
                return True
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                was_snatched = len(myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' % bookID))
                if was_snatched:
                    controlValueDict = {"BookID": bookID}
                    newValueDict = {"Status": "Failed", "NZBDate": now()}
                    myDB.upsert("wanted", newValueDict, controlValueDict)
                # reset status so we try for a different version
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % bookID)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))
        return False
    except Exception as e:
        logger.error('Unhandled exception in importBook: %s' % traceback.format_exc())
Exemplo n.º 20
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab

    myDB = database.DBConnection()
    searchlist = []
    threading.currentThread().name = "SEARCHMAGS"

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 1:
        logger.info('Searching for one magazine')
    else:
        logger.info('Searching for %i magazines' % len(searchmags))

    for searchmag in searchmags:
        bookid = searchmag[0]
        searchterm = searchmag[0]
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}

        searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                resultlist.append({
                    'bookid': item['bookid'],
                    'nzbprov': item['tor_prov'],
                    'nzbtitle': item['tor_title'],
                    'nzburl': item['tor_url'],
                    'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                    'nzbsize': item['tor_size'],
                    'nzbmode': 'torrent'
                })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            to_snatch = 0
            maglist = []
            issues = []
            reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)
            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = (u'%s' % nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
                nzbdate = formatter.nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid)
                if checkifmag:
                    for results in checkifmag:
                        control_date = results['IssueDate']
                        # frequency = results['Frequency']
                        # regex = results['Regex']

                    nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace(
                        '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip()
                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # keyword_check = nzbtitle_formatted.replace(bookid, '')
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    name_match = 1  # assume name matches for now
                    if len(nzbtitle_exploded) > len(bookid_exploded):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted))
                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted)
                            name_match = 0
                    
                    lower_title = common.remove_accents(nzbtitle_formatted).lower()
                    lower_bookid = common.remove_accents(bookid).lower()
                    for word in reject_list:
                        if word in lower_title and not word in lower_bookid:
                            name_match = 0
                            logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                            break

                    if name_match:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf':
                                nzbtitle_exploded.pop()  # gotta love the function names
                        
                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure
                            
                            #if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            #else:
                            #    regexA_day = '01'  # monthly, or less frequent

                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'

                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except:
                                        logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted)
                                        bad_date = bad_date + 1
                                        # allow issues with good name but bad date to be included
                                        # so user can manually select them, incl those with issue numbers
                                        newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                        # continue
                        else:
                            continue

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            start_time = time.time()
                            start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                            control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))

                        # only grab a copy if it's newer than the most recent we have,
                        # or newer than a month ago if we have none
                        comp_date = formatter.datecompare(newdatish, control_date)
                        if comp_date > 0:
                            # Should probably only upsert when downloaded and processed in case snatch fails
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                to_snatch = to_snatch + 1
                                issues.append(issue)
                                
                                controlValueDict = {"NZBurl": nzburl}
                                newValueDict = {
                                    "NZBprov": nzbprov,
                                    "BookID": bookid,
                                    "NZBdate": formatter.now(),  # when we asked for it
                                    "NZBtitle": nzbtitle,
                                    "AuxInfo": newdatish,
                                    "Status": "Wanted",
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert("wanted", newValueDict, controlValueDict)
                                
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1
                    else:
                        logger.debug('Magazine [%s] does not completely match search term [%s].' % (
                                     nzbtitle_formatted, bookid))
                        bad_regex = bad_regex + 1

            logger.info('Found %s results for %s.  %s are new, %s are old, %s fail date, %s fail name matching' % (
                        total_nzbs, bookid, new_date, old_date, bad_date, bad_regex))
            logger.info("%s, %s issues to download" % (bookid, to_snatch))

            for items in maglist:
                if items['nzbmode'] == "torznab":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                elif items['nzbmode'] == "torrent":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                else:
                    snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
            maglist = []

    if reset == True:
        common.schedule_job(action='Restart', target='search_magazines')
        
    logger.info("Search for magazines complete")    
Exemplo n.º 21
0
 def test_notify(self):
     return self._notifyTwitter(
         "This is a test notification from LazyLibrarian / " +
         formatter.now(),
         force=True)
Exemplo n.º 22
0
def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip()
        tor_Title = re.sub(r"\s\s+", " ", tor_Title)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, tor_Title)
        tor_Title_match = fuzz.token_set_ratio(title, tor_Title)
        logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title))

        rejected = False
        for word in reject_list:
            if word in tor_Title.lower() and not word in author.lower() and not word in book.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (tor_Title, word))
                break

        if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected):
            logger.debug(u'Found RSS: %s' % tor['tor_title'])
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_url = tor['tor_url']
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
            if tor_size_temp is None:
                tor_size_temp = 1000
            tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB'
            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }
            myDB.upsert("wanted", newValueDict, controlValueDict)
            snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                        bookid).fetchone()
            if not snatchedbooks:  # check if one of the other downloaders got there first
                if '.nzb' in tor_url:
                    snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url)
                else:    
                    #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                    if not tor_url.startswith('magnet'):  # magnets don't use auth
                        pwd  = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                        auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                        # don't know what form of password hash is required, try sha1    
                        tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                    snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url)

                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
                    return True

    logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                 book['bookName']).strip())
    return False
Exemplo n.º 23
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        "...": "",
        ".": " ",
        " & ": " ",
        " = ": " ",
        "?": "",
        "$": "s",
        " + ": " ",
        '"': "",
        ",": " ",
        "*": "",
        "(": "",
        ")": "",
        "[": "",
        "]": "",
        "#": "",
        "0": "",
        "1": "",
        "2": "",
        "3": "",
        "4": "",
        "5": "",
        "6": "",
        "7": "",
        "8": "",
        "9": "",
        "'": "",
        ":": "",
        "!": "",
        "-": " ",
        "\s\s": " ",
    }
    # ' the ': ' ', ' a ': ' ', ' and ': ' ',
    # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {
        "...": "",
        ".": " ",
        " & ": " ",
        " = ": " ",
        "?": "",
        "$": "s",
        " + ": " ",
        '"': "",
        ",": "",
        "*": "",
        ":": "",
        ";": "",
        "'": "",
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)
    author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic))
    title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title))

        rejected = False
        for word in reject_list:
            if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (nzb_Title, word))
                break

        nzbsize_temp = nzb["nzbsize"]  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if maxsize and nzbsize > maxsize:
            rejected = True
            logger.debug("Rejecting %s, too large" % nzb_Title)

        if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected:
            # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype))
            bookid = book["bookid"]
            nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip()
            nzburl = nzb["nzburl"]
            nzbprov = nzb["nzbprov"]
            nzbdate_temp = nzb["nzbdate"]
            nzbdate = formatter.nzbdate2format(nzbdate_temp)
            nzbmode = nzb["nzbmode"]
            controlValueDict = {"NZBurl": nzburl}
            newValueDict = {
                "NZBprov": nzbprov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": nzbsize,
                "NZBtitle": nzbTitle,
                "NZBmode": nzbmode,
                "Status": "Skipped",
            }

            score = (nzbBook_match + nzbAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(formatter.getList(nzb_Title))
            words -= len(formatter.getList(author))
            words -= len(formatter.getList(title))
            score -= abs(words)
            matches.append([score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype))

        myDB.upsert("wanted", newValueDict, controlValueDict)

        snatchedbooks = myDB.action(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]
        ).fetchone()
        if not snatchedbooks:
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(
                    newValueDict["BookID"],
                    newValueDict["NZBprov"],
                    newValueDict["NZBtitle"],
                    controlValueDict["NZBurl"],
                )
            else:
                snatch = NZBDownloadMethod(
                    newValueDict["BookID"],
                    newValueDict["NZBprov"],
                    newValueDict["NZBtitle"],
                    controlValueDict["NZBurl"],
                )
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now())
                common.schedule_job(action="Start", target="processDir")
                return True

    logger.debug(
        "No nzb's found for "
        + (book["authorName"] + " " + book["bookName"]).strip()
        + " using searchtype "
        + searchtype
    )
    return False
Exemplo n.º 24
0
 def test_notify(self):
     return self._notifyTwitter("This is a test notification from LazyLibrarian / " + formatter.now(), force=True)
Exemplo n.º 25
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading (needed for reporting author name)
        return: 0 if failed to snatch
                1 if already snatched
                2 if we snatched it
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        newValueDict = match[1]
        controlValueDict = match[2]

        # It's possible to get book and wanted tables "Snatched" status out of sync
        # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs
        # so check status in both tables here
        snatched = myDB.match(
            'SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"',
            (newValueDict["BookID"], newValueDict["AuxInfo"]))
        if snatched:
            logger.debug('%s %s %s already marked snatched in wanted table' %
                         (newValueDict["AuxInfo"], book['authorName'],
                          book['bookName']))
            return 1  # someone else already found it

        if newValueDict["AuxInfo"] == 'eBook':
            snatched = myDB.match(
                'SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                (newValueDict["BookID"], ))
        else:
            snatched = myDB.match(
                'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                (newValueDict["BookID"], ))
        if snatched:
            logger.debug('%s %s %s already marked snatched in book table' %
                         (newValueDict["AuxInfo"], book['authorName'],
                          book['bookName']))
            return 1  # someone else already found it

        myDB.upsert("wanted", newValueDict, controlValueDict)
        if newValueDict['NZBmode'] == 'direct':
            snatch = DirectDownloadMethod(newValueDict["BookID"],
                                          newValueDict["NZBtitle"],
                                          controlValueDict["NZBurl"],
                                          newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
            snatch = TORDownloadMethod(newValueDict["BookID"],
                                       newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"],
                                       newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] == 'nzb':
            snatch = NZBDownloadMethod(newValueDict["BookID"],
                                       newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"],
                                       newValueDict["AuxInfo"])
        else:
            logger.error('Unhandled NZBmode [%s] for %s' %
                         (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
            snatch = 0

        if snatch:
            logger.info('Downloading %s %s from %s' %
                        (newValueDict["AuxInfo"], newValueDict["NZBtitle"],
                         newValueDict["NZBprov"]))
            custom_notify_snatch(
                "%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo']))
            notify_snatch("%s %s from %s at %s" %
                          (newValueDict["AuxInfo"], newValueDict["NZBtitle"],
                           newValueDict["NZBprov"], now()))
            # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
            # This would implement a round-robin search system. Blocklist with an incremental counter.
            # If number of active providers == number blocklisted, so no unblocked providers are left,
            # either sleep for a while, or unblock the one with the lowest counter.
            scheduleJob(action='Start', target='processDir')
            return 2  # we found it
        return 0
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' %
                     traceback.format_exc())
        return 0
Exemplo n.º 26
0
def processDir():
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    #TODO - try exception on os.listdir - it throws debug level
    #exception if dir doesn't exist - bloody hard to catch
    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if snatched is None:
        logger.info('No books are snatched. Nothing to process.')
    elif downloads is None:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            if book['NZBtitle'] in downloads:
                pp_path = os.path.join(processpath, book['NZBtitle'])
                logger.info('Found folder %s.' % pp_path)

                data = myDB.select("SELECT * from books WHERE BookID='%s'" %
                                   book['BookID'])
                if data:
                    for metadata in data:
                        authorname = metadata['AuthorName']
                        authorimg = metadata['AuthorLink']
                        bookname = metadata['BookName']
                        bookdesc = metadata['BookDesc']
                        bookisbn = metadata['BookIsbn']
                        bookrate = metadata['BookRate']
                        bookimg = metadata['BookImg']
                        bookpage = metadata['BookPages']
                        booklink = metadata['BookLink']
                        bookdate = metadata['BookDate']
                        booklang = metadata['BookLang']
                        bookpub = metadata['BookPub']

                    #Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '$Author', authorname).replace('$Title', bookname)
                    #dest_path = authorname+'/'+bookname
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                        '$Author', authorname).replace('$Title', bookname)
                    #global_name = bookname + ' - ' + authorname
                else:
                    data = myDB.select(
                        "SELECT * from magazines WHERE Title='%s'" %
                        book['BookID'])
                    for metadata in data:
                        title = metadata['Title']
                    #AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    #files are downloading, there will be an error in post-processing, trying to go to the
                    #same directory.
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', title)
                    #dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', title)
                    #global_name = book['AuxInfo']+' - '+title
            else:
                logger.info("Snatched NZB %s is not in download directory" %
                            (book['NZBtitle']))
                continue

            try:
                os.chmod(
                    os.path.join(lazylibrarian.DESTINATION_DIR,
                                 dest_path).encode(lazylibrarian.SYS_ENCODING),
                    0777)
            except Exception, e:
                logger.debug("Could not chmod post-process directory")

            dic = {
                '<': '',
                '>': '',
                '...': '',
                ' & ': ' ',
                ' = ': ' ',
                '?': '',
                '$': 's',
                ' + ': ' ',
                '"': '',
                ',': '',
                '*': '',
                ':': '',
                ';': '',
                '\'': ''
            }
            dest_path = formatter.latinToAscii(
                formatter.replace_all(dest_path, dic))
            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                     dest_path).encode(
                                         lazylibrarian.SYS_ENCODING)

            processBook = processDestination(pp_path, dest_path, authorname,
                                             bookname, global_name)

            if processBook:

                ppcount = ppcount + 1

                # If you use auto add by Calibre you need the book in a single directory, not nested
                #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder.
                if lazylibrarian.IMP_AUTOADD:
                    processAutoAdd(dest_path)

                #update nzbs
                controlValueDict = {"NZBurl": book['NZBurl']}
                newValueDict = {"Status": "Processed"}
                myDB.upsert("wanted", newValueDict, controlValueDict)

                # try image
                if bookname is not None:
                    processIMG(dest_path, bookimg, global_name)

                    # try metadata
                    processOPF(dest_path, authorname, bookname, bookisbn,
                               book['BookID'], bookpub, bookdate, bookdesc,
                               booklang, global_name)

                    #update books
                    controlValueDict = {"BookID": book['BookID']}
                    newValueDict = {"Status": "Open"}
                    myDB.upsert("books", newValueDict, controlValueDict)

                    #update authors
                    query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname
                    countbooks = myDB.action(query).fetchone()
                    havebooks = int(countbooks[0])
                    controlValueDict = {"AuthorName": authorname}
                    newValueDict = {"HaveBooks": havebooks}
                    author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname
                    countauthor = myDB.action(author_query).fetchone()
                    if countauthor:
                        myDB.upsert("authors", newValueDict, controlValueDict)

                else:
                    #update mags
                    controlValueDict = {"Title": book['BookID']}
                    newValueDict = {"IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)

                logger.info('Successfully processed: %s' % (global_name))
                notifiers.notify_download(global_name + ' at ' +
                                          formatter.now())
            else:
                logger.error(
                    'Postprocessing for %s has failed. Warning - AutoAdd will be repeated'
                    % global_name)
        if ppcount:
            logger.debug('%s books are downloaded and processed.' % ppcount)
        else:
            logger.debug('No snatched books have been found')
Exemplo n.º 27
0
def search_magazines(mags=None):
    # produce a list of magazines to search for, tor, nzb, torznab

    myDB = database.DBConnection()
    searchlist = []
    threading.currentThread().name = "SEARCHMAGS"

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, Frequency, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, Frequency, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 1:
        logger.info('Searching for one magazine')
    else:
        logger.info('Searching for %i magazines' % len(searchmags))

    for searchmag in searchmags:
        bookid = searchmag[0]
        searchterm = searchmag[0]
        frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}

        searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB:
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR:
            tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                resultlist.append({
                    'bookid': item['bookid'],
                    'nzbprov': item['tor_prov'],
                    'nzbtitle': item['tor_title'],
                    'nzburl': item['tor_url'],
                    'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                    'nzbsize': item['tor_size'],
                    'nzbmode': 'torrent'
                })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            to_snatch = 0
            maglist = []
            issues = []
            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = (u'%s' % nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
                nzbdate = formatter.nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid)
                if checkifmag:
                    for results in checkifmag:
                        control_date = results['IssueDate']
                        frequency = results['Frequency']
                        # regex = results['Regex']

                    nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace(
                                            '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip()
                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA)
                    # keyword_check = nzbtitle_formatted.replace(bookid, '')
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    name_match = 1  # assume name matches for now
                    name_len = len(bookid_exploded)
                    if len(nzbtitle_exploded) > name_len:  # needs to be longer as it should include a date
                        while name_len:
                            name_len = name_len - 1
                            # fuzzy check on each word in the magazine name with any accents stripped
                            # fuzz.ratio doesn't lowercase for us
                            ratio = fuzz.ratio(common.remove_accents(nzbtitle_exploded[name_len].lower()),
                                               common.remove_accents(bookid_exploded[name_len].lower()))
                            if ratio < 80:  # hard coded fuzz ratio for now, works for close matches
                                logger.debug("Magazine fuzz ratio failed [%d] [%s] [%s]" % (
                                             ratio, bookid, nzbtitle_formatted))
                                name_match = 0  # name match failed
                    if name_match:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                            nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf':
                                nzbtitle_exploded.pop()  # gotta love the function names
                        
                        if len(nzbtitle_exploded) > 1:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or nn MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            if regexA_year.isdigit():
                                if int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                    regexA_year = 'Invalid'
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp))

                            if frequency == "Weekly" or frequency == "BiWeekly":
                                regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2)
                                if regexA_day.isdigit():
                                    if int(regexA_day) > 31:  # probably issue number nn
                                        regexA_day = '01'
                                else:
                                    regexA_day = '01'  # just MonthName YYYY
                            else:
                                regexA_day = '01'  # monthly, or less frequent
                            
                            newdatish_regexA = regexA_year + regexA_month + regexA_day

                            try:
                                int(newdatish_regexA)
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                            except:
                                # regexB = MonthName DD YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp))
                                newdatish_regexB = regexB_year + regexB_month + regexB_day

                                try:
                                    int(newdatish_regexB)
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                except:
                                    # regexC = YYYY MM or YYYY MM DD or Issue nn YYYY
                                    # (can't get MM/DD if named Issue nn)
                                    newdatish_regexC = 'Invalid'  # invalid unless works out otherwise
                                    regexC_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_temp.isdigit():
                                        if int(regexC_temp) > 1900 and int(regexC_temp) < 2100:  # YYYY MM  or YYYY nn
                                            regexC_year = regexC_temp
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                            regexC_day = '01'
                                            if regexC_month.isdigit():  # could be YYYY nn where nn is issue number
                                                if int(regexC_month) < 13:
                                                    # if issue number > 12 date matching will fail
                                                    newdatish_regexC = regexC_year + regexC_month + regexC_day
                                        else:
                                            regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                            if regexC_year.isdigit():
                                                if int(regexC_year) > 1900 and int(regexC_year) < 2100:  # YYYY MM DD or YYYY nn-nn
                                                    regexC_month = regexC_temp.zfill(2)
                                                    if int(regexC_month) < 13:
                                                        # if issue number > 12 date matching will fail
                                                        regexC_day = nzbtitle_exploded[len(
                                                                nzbtitle_exploded) - 1].zfill(2)
                                                        newdatish_regexC = regexC_year + regexC_month + regexC_day

                                    try:
                                        int(newdatish_regexC)
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                    except:
                                        logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted)
                                        bad_date = bad_date + 1
                                        # allow issues with good name but bad date to be included
                                        # so user can manually select them
                                        newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                        # continue

                        else:
                            continue

                        # Don't want to overwrite status = Skipped for NZBs that have been previously found
                        wanted_status = myDB.select('SELECT * from wanted WHERE NZBtitle="%s"' % nzbtitle)
                        if wanted_status:
                            for results in wanted_status:
                                status = results['Status']
                        else:
                            status = "Skipped"

                        controlValueDict = {"NZBurl": nzburl}
                        newValueDict = {
                            "NZBprov": nzbprov,
                            "BookID": bookid,
                            "NZBdate": nzbdate,
                            "NZBtitle": nzbtitle,
                            "AuxInfo": newdatish,
                            "Status": status,
                            "NZBsize": nzbsize,
                            "NZBmode": nzbmode
                        }
                        myDB.upsert("wanted", newValueDict, controlValueDict)

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            start_time = time.time()
                            start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                            control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))

                        # only grab a copy if it's newer than the most recent we have,
                        # or newer than a month ago if we have none
                        comp_date = formatter.datecompare(newdatish, control_date)
                        if comp_date > 0:
                            # Should probably only upsert when downloaded and processed in case snatch fails
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                to_snatch = to_snatch + 1
                                issues.append(issue)
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1
                    else:
                        logger.debug('Magazine [%s] does not completely match search term [%s].' % (
                                     nzbtitle_formatted, bookid))
                        bad_regex = bad_regex + 1

            logger.info('Found %s results for %s.  %s are new, %s are old, %s fail date, %s fail name matching' % (
                        total_nzbs, bookid, new_date, old_date, bad_date, bad_regex))
            logger.info("%s, %s issues to download" % (bookid, to_snatch))

            for items in maglist:
                if items['nzbmode'] == "torznab":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                elif items['nzbmode'] == "torrent":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                else:
                    snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now())
                    postprocess.schedule_processor(action='Start')
            maglist = []
    logger.info("Search for magazines complete")
Exemplo n.º 28
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading (needed for reporting author name)
        return: 0 if failed to snatch
                1 if already snatched
                2 if we snatched it
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        newValueDict = match[1]
        controlValueDict = match[2]

        # It's possible to get book and wanted tables "Snatched" status out of sync
        # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs
        # so check status in both tables here
        snatched = myDB.match('SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"',
                              (newValueDict["BookID"], newValueDict["AuxInfo"]))
        if snatched:
            logger.debug('%s %s %s already marked snatched in wanted table' %
                         (newValueDict["AuxInfo"], book['authorName'], book['bookName']))
            return 1  # someone else already found it

        if newValueDict["AuxInfo"] == 'eBook':
            snatched = myDB.match('SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                                  (newValueDict["BookID"],))
        else:
            snatched = myDB.match('SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                                  (newValueDict["BookID"],))
        if snatched:
            logger.debug('%s %s %s already marked snatched in book table' %
                         (newValueDict["AuxInfo"], book['authorName'], book['bookName']))
            return 1  # someone else already found it

        myDB.upsert("wanted", newValueDict, controlValueDict)
        if newValueDict['NZBmode'] == 'direct':
            snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                          controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
            snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] == 'nzb':
            snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        else:
            logger.error('Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
            snatch = 0

        if snatch:
            logger.info('Downloading %s %s from %s' %
                        (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"]))
            custom_notify_snatch("%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo']))
            notify_snatch("%s %s from %s at %s" %
                          (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
            # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
            # This would implement a round-robin search system. Blocklist with an incremental counter.
            # If number of active providers == number blocklisted, so no unblocked providers are left,
            # either sleep for a while, or unblock the one with the lowest counter.
            scheduleJob(action='Start', target='processDir')
            return 2  # we found it
        return 0
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc())
        return 0
Exemplo n.º 29
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                 )
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select(
                'SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"'
                % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' %
                (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Title']
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {
            '...': '',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': ''
        }

        searchterm = unaccented_str(replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ',
                            searchterm).encode(lazylibrarian.SYS_ENCODING)
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn(
            'There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No nzb providers are set. Check config for NEWZNAB or TORZNAB providers'
                )

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No torrent providers are set. Check config for TORRENT providers'
                )

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                        'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': item[
                            'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace(
                    "'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match(
                    'SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug(
                        'Magazine [%s] does not match search term [%s].' %
                        (nzbtitle, bookid))
                    bad_regex = bad_regex + 1
                else:
                    control_date = results['IssueDate']
                    reject_list = getList(results['Regex'])

                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': ''
                    }
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(
                        nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    rejected = False
                    if len(nzbtitle_exploded) > len(
                            bookid_exploded
                    ):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(
                            unaccented(bookid), unaccented(nzbtitle_formatted))

                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(u"Magazine token set Match failed: " +
                                         str(mag_title_match) + "% for " +
                                         nzbtitle_formatted)
                            rejected = True
                    else:
                        rejected = True

                    if not rejected:
                        already_failed = myDB.match(
                            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"'
                            % nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted,
                                          already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" %
                                             (nzbtitle_formatted, word))
                                break

                    # maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
                    # if maxsize and nzbsize > maxsize:
                    #    rejected = True
                    #    logger.debug("Rejecting %s, too large" % nzbtitle_formatted)

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in ['pdf', 'true', 'truepdf']:
                            nzbtitle_exploded.pop(
                            )  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(
                                unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(
                                    regexA_year) < 1900 or int(
                                        regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[
                                len(nzbtitle_exploded) -
                                3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day
                                       ) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year),
                                                      int(regexA_month),
                                                      int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(
                                    unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[
                                    len(nzbtitle_exploded) -
                                    2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(
                                        regexB_year) < 1900 or int(
                                            regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(
                                        int(regexB_year), int(regexB_month),
                                        int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[
                                        len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit(
                                    ) and int(regexC_year) > 1900 and int(
                                            regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[
                                            len(nzbtitle_exploded) -
                                            1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[
                                            len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit(
                                        ) and int(regexC_year) > 1900 and int(
                                                regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                2].zfill(2)
                                            regexC_day = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(
                                            int(regexC_year),
                                            int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Vol nn, YYYY or Issue/No/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[
                                                len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in [
                                                    "issue", "no", "vol"
                                            ]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(
                                                        int(regexD_issue)
                                                    )  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in [
                                                        "issue", "no", "vol"
                                                ]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 2]
                                                    regexD_issue = regexD_issue.strip(
                                                        ',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(
                                                            int(regexD_issue)
                                                        )  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 1]
                                                    if regexD_year.isdigit():
                                                        if int(
                                                                regexD_year
                                                        ) < int(datetime.date.
                                                                today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            logger.debug(
                                                'Magazine %s not in proper date format.'
                                                % nzbtitle_formatted)
                                            bad_date = bad_date + 1
                                            # allow issues with good name but bad date to be included
                                            # so user can manually select them, incl those with issue numbers
                                            newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                            # continue
                        else:
                            logger.debug(
                                'Magazine [%s] does not match the search term [%s].'
                                % (nzbtitle_formatted, bookid))
                            bad_regex = bad_regex + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                                control_date = time.strftime(
                                    "%Y-%m-%d", time.localtime(start_time))
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(
                                newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(
                                4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug(
                                'Magazine %s incorrect date or issue format.' %
                                nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug(
                                    'This issue of %s is new, downloading' %
                                    nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug(
                                    'This issue of %s is already flagged for download'
                                    % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug(
                                    'This issue of %s is old; skipping.' %
                                    nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select(
                            'SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"'
                            % (insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict,
                                        controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_regex = bad_regex + 1

            logger.info(
                'Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download'
                % (total_nzbs, plural(total_nzbs), bookid, new_date, old_date,
                   bad_date, bad_regex, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' %
                                (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" % (unaccented(
                        magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")
Exemplo n.º 30
0
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                    ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                    '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                    ':': '', '!': '', '-': ' ', '\s\s': ' '}

        dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
               ',': '', '*': '', ':': '.', ';': '', '\'': ''}

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ", resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith('http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" % (resultTitle, word))
                        break

            size_temp = check_int(res[prefix + 'size'], 1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res['tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [x for x in wordlist if x not in getList(author.lower())]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [i for i, x in enumerate(typelist) if x == item]:
                            score += i + 1

                matches.append([score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info('Nearest match (%s%%): %s using %s search for %s %s' %
                            (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName']))
            else:
                logger.info('Best match (%s%%): %s using %s search, %s priority %s' %
                            (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
Exemplo n.º 31
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                              newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
Exemplo n.º 32
0
def search_nzb_book(books=None, mags=None):
    if not (lazylibrarian.USE_NZB):
        return
    # rename this thread
    threading.currentThread().name = "SEARCHNZBBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    searchlist1 = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')

        # Clear cache
        if os.path.exists(".ProviderCache"):
            for f in os.listdir(".ProviderCache"):
                os.unlink("%s/%s" % (".ProviderCache", f))

        # Clearing throttling timeouts
        t = SimpleCache.ThrottlingProcessor()
        t.lastRequestTime.clear()
    else:
        # The user has added a new book
        searchbooks = []
        if books != False:
            for book in books:
                searchbook = myDB.select(
                    'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"',
                    [book["bookid"]],
                )
                for terms in searchbook:
                    searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]

        dic = {
            "...": "",
            ".": " ",
            " & ": " ",
            " = ": " ",
            "?": "",
            "$": "s",
            " + ": " ",
            '"': "",
            ",": "",
            "*": "",
            ":": "",
            ";": "",
        }
        dicSearchFormatting = {".": " +", " + ": " "}

        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))

        # TRY SEARCH TERM just using author name and book type
        author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting))
        searchterm = author + " " + book  # + ' ' + lazylibrarian.EBOOK_TYPE
        searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8")
        searchterm = re.sub(r"\(.*?\)", "", searchterm).encode("utf-8")
        searchterm = re.sub(r"\s\s+", " ", searchterm)  # strip any double white space
        searchlist.append(
            {"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}
        )

    if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE:
        logger.info("No download method is set, use SABnzbd or blackhole")

    # TODO - Move the newznab test to providers.py
    if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2 and not lazylibrarian.USENETCRAWLER:
        logger.info("No providers are set. try use NEWZNAB.")

    counter = 0
    for book in searchlist:
        # print book.keys()
        resultlist = providers.IterateOverNewzNabSites(book, "book")

        # if you can't find teh book specifically, you might find under general search
        if not resultlist:
            logger.info("Searching for type book failed to find any books...moving to general search")
            resultlist = providers.IterateOverNewzNabSites(book, "general")

        if not resultlist:
            logger.debug("Adding book %s to queue." % book["searchterm"])

        else:
            dictrepl = {
                "...": "",
                ".": " ",
                " & ": " ",
                " = ": " ",
                "?": "",
                "$": "s",
                " + ": " ",
                '"': "",
                ",": "",
                "*": "",
                "(": "",
                ")": "",
                "[": "",
                "]": "",
                "#": "",
                "0": "",
                "1": "",
                "2": "",
                "3": "",
                "4": "",
                "5": "",
                "6": "",
                "7": "",
                "8": "",
                "9": "",
                "'": "",
                ":": "",
                "!": "",
                "-": "",
                "\s\s": " ",
                " the ": " ",
                " a ": " ",
                " and ": " ",
                " to ": " ",
                " of ": " ",
                " for ": " ",
                " my ": " ",
                " in ": " ",
                " at ": " ",
                " with ": " ",
            }
            logger.debug(u"searchterm %s" % book["searchterm"])
            addedCounter = 0

            for nzb in resultlist:
                nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb["nzbtitle"]).lower(), dictrepl)).strip()
                nzbTitle = re.sub(r"\s\s+", " ", nzbTitle)  # remove extra whitespace
                logger.debug(u"nzbName %s" % nzbTitle)

                match_ratio = int(lazylibrarian.MATCH_RATIO)
                nzbTitle_match = fuzz.token_sort_ratio(book["searchterm"].lower(), nzbTitle)
                logger.debug("NZB Title Match %: " + str(nzbTitle_match))

                if nzbTitle_match > match_ratio:
                    logger.info(u"Found NZB: %s" % nzb["nzbtitle"])
                    addedCounter = addedCounter + 1
                    bookid = book["bookid"]
                    nzbTitle = (book["authorName"] + " - " + book["bookName"] + " LL.(" + book["bookid"] + ")").strip()
                    nzburl = nzb["nzburl"]
                    nzbprov = nzb["nzbprov"]
                    nzbdate_temp = nzb["nzbdate"]
                    nzbsize_temp = nzb["nzbsize"]  # Need to cater for when this is NONE (Issue 35)
                    if nzbsize_temp is None:
                        nzbsize_temp = 1000
                    nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + " MB"
                    nzbdate = formatter.nzbdate2format(nzbdate_temp)

                    controlValueDict = {"NZBurl": nzburl}
                    newValueDict = {
                        "NZBprov": nzbprov,
                        "BookID": bookid,
                        "NZBdate": nzbdate,
                        "NZBsize": nzbsize,
                        "NZBtitle": nzbTitle,
                        "Status": "Skipped",
                    }
                    myDB.upsert("wanted", newValueDict, controlValueDict)

                    snatchedbooks = myDB.action(
                        'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]
                    ).fetchone()
                    if not snatchedbooks:
                        snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                        notifiers.notify_snatch(nzbTitle + " at " + formatter.now())
                    break
            if addedCounter == 0:
                logger.info(
                    "No nzb's found for "
                    + (book["authorName"] + " " + book["bookName"]).strip()
                    + ". Adding book to queue."
                )
        counter = counter + 1

    if not books or books == False:
        snatched = searchmag.searchmagazines(mags)
        for items in snatched:
            snatch = DownloadMethod(items["bookid"], items["nzbprov"], items["nzbtitle"], items["nzburl"])
            notifiers.notify_snatch(items["nzbtitle"] + " at " + formatter.now())
    logger.info("Search for Wanted items complete")
Exemplo n.º 33
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                                item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemplo n.º 34
0
                    controlValueDict = {"NZBurl": tor_url}
                    newValueDict = {
                        "NZBprov": tor_prov,
                        "BookID": bookid,
                        "NZBsize": tor_size,
                        "NZBtitle": tor_Title,
                        "NZBmode": "torrent",
                        "Status": "Skipped"
                    }
                    myDB.upsert("wanted", newValueDict, controlValueDict)

                    snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone()
                    if not snatchedbooks:
                        TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url)
                        notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now())
                    break
            if addedCounter == 0:
                logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.")
        counter = counter + 1
    logger.info("TORSearch for Wanted items complete")


def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None):
    myDB = database.DBConnection()
    download = False
    full_url = tor_url # keep the url as stored in "wanted" table
    if (lazylibrarian.USE_TOR) and (lazylibrarian.TOR_DOWNLOADER_DELUGE or lazylibrarian.TOR_DOWNLOADER_UTORRENT
                                    or lazylibrarian.TOR_DOWNLOADER_BLACKHOLE or lazylibrarian.TOR_DOWNLOADER_TRANSMISSION):

        if tor_url.startswith('magnet'):
Exemplo n.º 35
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemplo n.º 36
0
def processDir(force=False, reset=False):
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return False

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if force is False and len(snatched) == 0:
        logger.info('Nothing marked as snatched. Stopping postprocessor job.')
        common.schedule_job(action='Stop', target='processDir')
    elif len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
    else:
        logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched)))
        ppcount = 0
        for book in snatched:
            found = False
            for fname in downloads:
                if not fname.endswith('.fail'):  # has this failed before?
                    # this is to get round differences in torrent filenames.
                    # Torrents aren't always returned with the name we searched for
                    # there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]
                    matchtitle = book['NZBtitle']
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match >= 95:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here...
                            if formatter.is_valid_booktype(fname, booktype="book") \
                                or formatter.is_valid_booktype(fname, booktype="mag"):
                                dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                                if not os.path.exists(dirname):
                                    try:
                                        os.makedirs(dirname)
                                    except OSError as why:
                                        logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                if os.path.exists(dirname):
                                    try:
                                        shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                        fname = os.path.splitext(fname)[0]
                                    except Exception as why:
                                        logger.debug("Failed to move file %s to %s, %s" % 
                                            (fname, dirname, str(why)))                                         
                        if os.path.isdir(os.path.join(processpath, fname)): 
                            pp_path = os.path.join(processpath, fname)
                            logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle']))
                            found = True
                            break
                    else:
                        logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
                else:
                    logger.debug('Skipping %s' % fname)
            if found:
                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = common.remove_accents(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                        lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate',
                            book['AuxInfo']).replace('$Title',
                                                     mag_name)
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = common.remove_accents(global_name)
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                continue

            # try:
            #    os.chmod(dest_path, 0777)
            # except Exception, e:
            #    logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname is not None:  # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(),
                                    "IssueFile": dest_file,
                                    "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    magazinescan.create_cover(dest_file)

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except:
                    logger.debug("Unable to rename %s" % pp_path)

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            if "LL.(" in directory and not directory.endswith('.fail'):
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1

        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
    if reset:
        common.schedule_job(action='Restart', target='processDir')
Exemplo n.º 37
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading
        return: True if already snatched, False if failed to snatch, >True if we snatched it
    """
    try:
        myDB = database.DBConnection()

        resultTitle = match[1]
        newValueDict = match[2]
        controlValueDict = match[3]

        if book['library'] == 'AudioBook':
            auxinfo = 'AudioBook'
        else:  # elif book['library'] == 'eBook':
            auxinfo = 'eBook'

        if auxinfo == 'eBook':
            snatchedbooks = myDB.match(
                'SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                (newValueDict["BookID"], ))
        else:
            snatchedbooks = myDB.match(
                'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                (newValueDict["BookID"], ))

        if snatchedbooks:
            logger.debug('%s %s already marked snatched' %
                         (book['authorName'], book['bookName']))
            return True  # someone else already found it
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if 'libgen' in newValueDict[
                    "NZBprov"]:  # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"],
                                              newValueDict["NZBtitle"],
                                              controlValueDict["NZBurl"],
                                              resultTitle, auxinfo)
            elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            elif newValueDict['NZBmode'] == 'nzb':
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            else:
                logger.error(
                    'Unhandled NZBmode [%s] for %s' %
                    (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
                snatch = False

            if snatch:
                logger.info('Downloading %s %s from %s' %
                            (auxinfo, newValueDict["NZBtitle"],
                             newValueDict["NZBprov"]))
                notify_snatch("%s %s from %s at %s" %
                              (auxinfo, newValueDict["NZBtitle"],
                               newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
                # This would implement a round-robin search system. Blocklist with an incremental counter.
                # If number of active providers == number blocklisted, so no unblocked providers are left,
                # either sleep for a while, or unblock the one with the lowest counter.
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
        return False
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' %
                     traceback.format_exc())
Exemplo n.º 38
0
            f.close()
            logger.info('NZB file saved to: ' + nzbpath)
            download = True
        except Exception, e:
            logger.error('%s not writable, NZB not saved. Error: %s' %
                         (nzbpath, e))
            download = False

    if download:
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID=?',
                    [bookid])

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "NZBprov": nzbprov,
            "NZBdate": formatter.now(),
            "NZBurl": nzburl,
            "NZBtitle": nzbtitle,
            "Status": "Snatched"
        }

        myDB.upsert("wanted", newValueDict, controlValueDict)

    else:
        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "NZBprov": nzbprov,
            "NZBdate": formatter.now(),
            "NZBurl": nzburl,
            "NZBtitle": nzbtitle,
            "Status": "Failed"
Exemplo n.º 39
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    matches = []
    for tor in resultlist:
        torTitle = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']), dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic))
        title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic))

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))

        rejected = False

        for word in reject_list:
            if word in torTitle.lower() and not word in author.lower() and not word in title.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                break

        if (torAuthor_match >= match_ratio and torBook_match >= match_ratio and not rejected):
            #logger.debug(u'Found Torrent: %s using %s search' % (tor['tor_title'], searchtype))
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
            tor_url = tor['tor_url']
            tor_prov = tor['tor_prov']

            tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
            if tor_size_temp is None:
                tor_size_temp = 1000
            tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB'
            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match)/2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(formatter.getList(torTitle))
            words -= len(formatter.getList(author))
            words -= len(formatter.getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match TOR (%s%%): %s using %s search' % 
            (score, nzb_Title, searchtype))
                  
        myDB.upsert("wanted", newValueDict, controlValueDict)

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()
        if not snatchedbooks:
            snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now())
                common.schedule_job(action='Start', target='processDir')
                return True

    logger.debug("No torrent's found for " + (book["authorName"] + ' ' +
                 book['bookName']).strip() + " using searchtype " + searchtype)
    return False
Exemplo n.º 40
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }
    # ' the ': ' ', ' a ': ' ', ' and ': ' ',
    # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': '',
        '*': '',
        ':': '',
        ';': '',
        '\'': ''
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'],
                                               dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" %
                     (nzbAuthor_match, nzbBook_match, nzb_Title))
        nzburl = nzb['nzburl']

        rejected = False

        already_failed = myDB.action(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            nzburl).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (nzb_Title, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in nzb_Title.lower(
                ) and not word in author.lower() and not word in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (nzb_Title, word))
                    break

        nzbsize_temp = nzb[
            'nzbsize']  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and nzbsize > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % nzb_Title)

        if not rejected:
            if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio:
                bookid = book['bookid']
                nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] +
                            ')').strip()
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": nzbsize,
                    "NZBtitle": nzbTitle,
                    "NZBmode": nzbmode,
                    "Status": "Skipped"
                }

                score = (nzbBook_match +
                         nzbAuthor_match) / 2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(nzb_Title))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append(
                    [score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match NZB (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"]).fetchone()
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it
        else:
            logger.debug('%s adding to wanted' % nzb_Title)
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' +
                                        now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No nzb's found for [%s] using searchtype %s" %
                     (book["searchterm"], searchtype))
    return False
Exemplo n.º 41
0
def processDir():
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    # TODO - try exception on os.listdir - it throws debug level
    # exception if dir doesn't exist - bloody hard to catch
    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)
        return False
        
    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if snatched is None:
        logger.info('No books are snatched. Nothing to process.')
    elif downloads is None:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            if book['NZBtitle'] in downloads:
                pp_path = os.path.join(processpath, book['NZBtitle'])
                logger.debug('Found book/mag folder %s.' % pp_path)

                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate'] # keep this for processing issues arriving out of order
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID'])
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID'])
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue                    
            else:
                logger.debug("Snatched NZB %s is not in download directory" % (book['NZBtitle']))
                continue

            dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
            dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
            try:
                os.chmod(dest_path, 0777)
            except Exception, e:
                logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['BookID'])

            if processBook:

                ppcount = ppcount + 1

                # update nzbs
                controlValueDict = {"NZBurl": book['NZBurl']}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)
                    
                if bookname is not None: # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else: 
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:    
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path)
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file}
                    myDB.upsert("issues", newValueDict, controlValueDict)
                                    
                logger.info('Successfully processed: %s' % global_name)
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s' % pp_path)
        #
        # TODO Seems to be duplication here. Can we just scan once for snatched books 
        # instead of scan for snatched and then scan for directories with "LL.(bookID)" in?
        # Should there be any directories with "LL.(bookID)" that aren't in snatched?
        # Maybe this was put in for manually downloaded books?
        #  
        downloads = os.listdir(processpath) # check in case we processed/deleted some above      
        for directory in downloads:
            if "LL.(" in directory:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath) 

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1
        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
Exemplo n.º 42
0
def processDir(reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "POSTPROCESS"
        processpath = lazylibrarian.DIRECTORY('Download')

        logger.debug('Checking [%s] for files to post process' % processpath)

        try:
            downloads = os.listdir(processpath)
        except OSError as why:
            logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
            return

        myDB = database.DBConnection()
        snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

        if len(snatched) == 0:
            logger.info('Nothing marked as snatched.')
            scheduleJob(action='Stop', target='processDir')
            return

        if len(downloads) == 0:
            logger.info('No downloads are found. Nothing to process yet.')
            return

        logger.info("Checking %s download%s for %s snatched file%s" %
                    (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
        ppcount = 0
        for book in snatched:
            # if torrent, see if we can get current status from the downloader as the name
            # may have been changed once magnet resolved, or download started or completed
            # depending on torrent downloader. Usenet doesn't change the name. We like usenet.
            torrentname = ''
            try:
                logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source']))
                if book['Source'] == 'TRANSMISSION':
                    torrentname = transmission.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'UTORRENT':
                    torrentname = utorrent.nameTorrent(book['DownloadID'])
                elif book['Source'] == 'RTORRENT':
                    torrentname = rtorrent.getName(book['DownloadID'])
                elif book['Source'] == 'QBITTORRENT':
                    torrentname = qbittorrent.getName(book['DownloadID'])
                elif book['Source'] == 'SYNOLOGY_TOR':
                    torrentname = synology.getName(book['DownloadID'])
                elif book['Source'] == 'DELUGEWEBUI':
                    torrentname = deluge.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'DELUGERPC':
                    client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                             int(lazylibrarian.DELUGE_PORT),
                                             lazylibrarian.DELUGE_USER,
                                             lazylibrarian.DELUGE_PASS)
                    try:
                        client.connect()
                        result = client.call('core.get_torrent_status', book['DownloadID'], {})
                        #    for item in result:
                        #        logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                        if 'name' in result:
                            torrentname = unaccented_str(result['name'])
                    except Exception as e:
                        logger.debug('DelugeRPC failed %s' % str(e))
            except Exception as e:
                logger.debug("Failed to get updated torrent name from %s for %s: %s" %
                            (book['Source'], book['DownloadID'], str(e)))

            matchtitle = unaccented_str(book['NZBtitle'])
            if torrentname and torrentname != matchtitle:
                logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl']))
                matchtitle = torrentname

            # here we could also check percentage downloaded or eta or status?
            # If downloader says it hasn't completed, no need to look for it.

            matches = []
            logger.info('Looking for %s in %s' % (matchtitle, processpath))
            for fname in downloads:
                # skip if failed before or incomplete torrents, or incomplete btsync
                extn = os.path.splitext(fname)[1]
                if extn not in ['.fail', '.part', '.bts', '.!ut']:
                    # This is to get round differences in torrent filenames.
                    # Usenet is ok, but Torrents aren't always returned with the name we searched for
                    # We ask the torrent downloader for the torrent name, but don't always get an answer
                    # so we try to do a "best match" on the name, there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]

                    match = 0
                    if matchtitle:
                        if ' LL.(' in matchtitle:
                            matchtitle = matchtitle.split(' LL.(')[0]
                        match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match and match >= lazylibrarian.DLOAD_RATIO:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here. Book/mag file in download root.
                            # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                            logger.debug('filename [%s] is a file' % os.path.join(processpath, fname))
                            if is_valid_booktype(fname, booktype="book") \
                                    or is_valid_booktype(fname, booktype="mag"):
                                logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname))
                                if bts_file(processpath):
                                    logger.debug("Skipping %s, found a .bts file" % processpath)
                                else:
                                    fname = os.path.splitext(fname)[0]
                                    dirname = os.path.join(processpath, fname)
                                    if not os.path.exists(dirname):
                                        try:
                                            os.makedirs(dirname)
                                            setperm(dirname)
                                        except OSError as why:
                                            logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                    if os.path.exists(dirname):
                                        # move the book and any related files too
                                        # ie other book formats, or opf, jpg with same title
                                        # can't move metadata.opf or cover.jpg or similar
                                        # as can't be sure they are ours
                                        # not sure if we need a new listdir here, or whether we can use the old one
                                        list_dir = os.listdir(processpath)
                                        for ourfile in list_dir:
                                            if ourfile.startswith(fname):
                                                if is_valid_booktype(ourfile, booktype="book") \
                                                    or is_valid_booktype(ourfile, booktype="mag") \
                                                        or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                                    try:
                                                        if lazylibrarian.DESTINATION_COPY:
                                                            shutil.copyfile(os.path.join(processpath, ourfile),
                                                                            os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                        else:
                                                            shutil.move(os.path.join(processpath, ourfile),
                                                                        os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                    except Exception as why:
                                                        logger.debug("Failed to copy/move file %s to %s, %s" %
                                                                    (ourfile, dirname, str(why)))

                        pp_path = os.path.join(processpath, fname)
                        if os.path.isdir(pp_path):
                            logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle))
                            if not os.listdir(pp_path):
                                logger.debug("Skipping %s, folder is empty" % pp_path)
                            elif bts_file(pp_path):
                                logger.debug("Skipping %s, found a .bts file" % pp_path)
                            else:
                                matches.append([match, pp_path, book])
                    else:
                        pp_path = os.path.join(processpath, fname)
                        matches.append([match, pp_path, book])  # so we can report closest match
                else:
                    logger.debug('Skipping %s' % fname)

            match = 0
            if matches:
                highest = max(matches, key=lambda x: x[0])
                match = highest[0]
                pp_path = highest[1]
                book = highest[2]
            if match and match >= lazylibrarian.DLOAD_RATIO:
                logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle']))
                data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:  # it's a book
                    logger.debug(u'Processing book %s' % book['BookID'])
                    authorname = data['AuthorName']
                    bookname = data['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = unaccented(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = unaccented_str(replace_all(dest_path, dic))
                    dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:  # it's a magazine
                        logger.debug(u'Processing magazine %s' % book['BookID'])
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = unaccented_str(replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate', book['AuxInfo']).replace('$Title', mag_name)

                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(processpath, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = unaccented(global_name)
                    else:  # not recognised
                        logger.debug('Nothing in database matching "%s"' % book['BookID'])
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                if match:
                    logger.debug(u'Closest match (%s%%): %s' % (match, pp_path))
                    #for match in matches:
                    #    logger.info('Match: %s%%  %s' % (match[0], match[1]))
                continue

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname:
                    # it's a book, if None it's a magazine
                    if len(lazylibrarian.IMP_CALIBREDB):
                        logger.debug('Calibre should have created the extras for us')
                    else:
                        processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue:
                        if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                            older = int(mostrecentissue) > int(book['AuxInfo'])  # issuenumber
                        else:
                            older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                    else:
                        older = False
                    if older:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": today(),
                                    "IssueFile": dest_file,
                                    "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    create_cover(dest_file)

                # calibre or ll copied/moved the files we want, now delete source files

                to_delete = True
                if book['NZBmode'] in ['torrent', 'magnet']:
                    # Only delete torrents if we don't want to keep seeding
                    if lazylibrarian.KEEP_SEEDING:
                        logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle']))
                        to_delete = False
                    else:
                        # ask downloader to delete the torrent, but not the files
                        # we may delete them later, depending on other settings
                        if book['DownloadID'] != "unknown":
                            logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower()))
                            delete_task(book['Source'], book['DownloadID'], False)
                        else:
                            logger.warn("Unable to remove %s from %s, no DownloadID" %
                                (book['NZBtitle'], book['Source'].lower()))

                if to_delete:
                    # only delete the files if not in download root dir and if DESTINATION_COPY not set
                    if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath):
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now()))
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
                # if it's a book, reset status so we try for a different version
                # if it's a magazine, user can select a different one from pastissues table
                if bookname:
                    myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            dname, extn = os.path.splitext(directory)
            if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " found in download directory")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    if import_book(pp_path, bookID):
                        ppcount = ppcount + 1

        if ppcount == 0:
            logger.info('No snatched books/mags have been found')
        else:
            logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

        # Now check for any that are still marked snatched...
        if lazylibrarian.TASK_AGE:
            snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')
            if len(snatched) > 0:
                for snatch in snatched:
                    # FUTURE: we could check percentage downloaded or eta?
                    # if percentage is increasing, it's just slow
                    try:
                        when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S')
                        when_snatched = time.mktime(when_snatched)
                        diff = time.time() - when_snatched  # time difference in seconds
                    except:
                        diff = 0
                    hours = int(diff / 3600)
                    if hours >= lazylibrarian.TASK_AGE:
                        logger.warn('%s was sent to %s %s hours ago, deleting failed task' %
                                    (snatch['NZBtitle'], snatch['Source'].lower(), hours))
                        # change status to "Failed", and ask downloader to delete task and files
                        if snatch['BookID'] != 'unknown':
                            myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID'])
                            myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID'])
                            delete_task(snatch['Source'], snatch['DownloadID'], True)
        if reset:
            scheduleJob(action='Restart', target='processDir')

    except Exception as e:
        logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
Exemplo n.º 43
0
def processDir():
	# rename this thread
	threading.currentThread().name = "POSTPROCESS"

	processpath = lazylibrarian.DOWNLOAD_DIR
	
	logger.debug(' Checking [%s] for files to post process' % processpath)
	
	#TODO - try exception on os.listdir - it throws debug level 
	#exception if dir doesn't exist - bloody hard to catch
	try :
		downloads = os.listdir(processpath)
	except OSError:
			logger.error('Could not access [%s] directory ' % processpath)
			
	myDB = database.DBConnection()
	snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

	if snatched is None:
		logger.info('No books are snatched. Nothing to process.')
	elif downloads is None:
		logger.info('No downloads are found. Nothing to process.')
	else:
		ppcount=0
		for book in snatched:
			if book['NZBtitle'] in downloads:
				pp_path = os.path.join(processpath, book['NZBtitle'])
				logger.info('Found folder %s.' % pp_path)

				data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID'])
				if data:
					for metadata in data:
						authorname = metadata['AuthorName']
						authorimg = metadata['AuthorLink']
						bookname = metadata['BookName']
						bookdesc = metadata['BookDesc']
						bookisbn = metadata['BookIsbn']
						bookrate = metadata['BookRate']
						bookimg = metadata['BookImg']
						bookpage = metadata['BookPages']
						booklink = metadata['BookLink']
						bookdate = metadata['BookDate']
						booklang = metadata['BookLang']
						bookpub = metadata['BookPub']

					#Default destination path, should be allowed change per config file.
					dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
					#dest_path = authorname+'/'+bookname
					global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
					#global_name = bookname + ' - ' + authorname
				else:
					data = myDB.select("SELECT * from magazines WHERE Title='%s'" % book['BookID'])
					for metadata in data:
						title = metadata['Title']
					#AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
					#files are downloading, there will be an error in post-processing, trying to go to the 
					#same directory.
					dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', title)
					#dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
					authorname = None
					bookname = None
					global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', title)
					#global_name = book['AuxInfo']+' - '+title
			else:
				logger.info("Snatched NZB %s is not in download directory" % (book['NZBtitle']))
				continue

			try:
				os.chmod(os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING), 0777);
			except Exception, e:
				logger.debug("Could not chmod post-process directory");

			dic = {'<':'', '>':'', '...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''}
			dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
			dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)

			processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

			if processBook:

				ppcount = ppcount+1
				
				# If you use auto add by Calibre you need the book in a single directory, not nested
				#So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder.
				if lazylibrarian.IMP_AUTOADD:
					processAutoAdd(dest_path)

				#update nzbs
				controlValueDict = {"NZBurl": book['NZBurl']}
				newValueDict = {"Status": "Processed"}
				myDB.upsert("wanted", newValueDict, controlValueDict)

				# try image
				if bookname is not None:
					processIMG(dest_path, bookimg, global_name)

					# try metadata
					processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang, global_name)

					#update books
					controlValueDict = {"BookID": book['BookID']}
					newValueDict = {"Status": "Open"}
					myDB.upsert("books", newValueDict, controlValueDict)

					#update authors
					query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname
					countbooks = myDB.action(query).fetchone()
					havebooks = int(countbooks[0])
					controlValueDict = {"AuthorName": authorname}
					newValueDict = {"HaveBooks": havebooks}
					author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname
					countauthor = myDB.action(author_query).fetchone()
					if countauthor:
						myDB.upsert("authors", newValueDict, controlValueDict)

				else:
					#update mags
					controlValueDict = {"Title": book['BookID']}
					newValueDict = {"IssueStatus": "Open"}
					myDB.upsert("magazines", newValueDict, controlValueDict)

				logger.info('Successfully processed: %s' % (global_name))
				notifiers.notify_download(global_name+' at '+formatter.now())
			else:
				logger.error('Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % global_name)
		if ppcount:
			logger.debug('%s books are downloaded and processed.' % ppcount)
		else:
			logger.debug('No snatched books have been found')
Exemplo n.º 44
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
        (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug("Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                    (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                       ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                    lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                            lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                        '$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug('Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                else:
                    newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                    "IssueStatus": "Open"}
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                newValueDict = {"IssueAcquired": today(),
                                "IssueFile": dest_file,
                                "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
Exemplo n.º 45
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '.', ';': ''}

    match_ratio = int(lazylibrarian.CONFIG['MATCH_RATIO'])
    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
        if not rejected:
            if minsize and tor_size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
Exemplo n.º 46
0
def search_tor_book(books=None, mags=None):
    if not(lazylibrarian.USE_TOR):
        return
    # rename this thread
    threading.currentThread().name = "SEARCHTORBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    searchlist1 = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')

        # Clear cache
        if os.path.exists(".ProviderCache"):
            for f in os.listdir(".ProviderCache"):
                os.unlink("%s/%s" % (".ProviderCache", f))

        # Clearing throttling timeouts
        t = SimpleCache.ThrottlingProcessor()
        t.lastRequestTime.clear()
    else:
        # The user has added a new book
        searchbooks = []
        if books != False:
            for book in books:
                searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']])
                for terms in searchbook:
                    searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]

        dic = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''}
        dicSearchFormatting = {'.':' +', ' + ':' '}

        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))

        # TRY SEARCH TERM just using author name and book type
        author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting))
        searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE 
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8')
        searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space
        searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()})
    
    if not lazylibrarian.KAT:
        logger.info('No download method is set, use SABnzbd or blackhole')


    counter = 0
    for book in searchlist: 
        #print book.keys()
        resultlist = providers.IterateOverTorrentSites(book,'book')

        #if you can't find teh book specifically, you might find under general search
        if not resultlist:
            logger.info("Searching for type book failed to find any books...moving to general search")
            resultlist = providers.IterateOverTorrentSites(book,'general')

        if not resultlist:
            logger.debug("No result found, Adding book %s to queue " % book['searchterm'])

        else:
            dictrepl = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' }
            logger.debug(u'searchterm %s' % book['searchterm'])
            addedCounter = 0

            for tor in resultlist:
                tor_Title = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']).lower(), dictrepl)).strip()
                tor_Title = re.sub(r"\s\s+" , " ", tor_Title) #remove extra whitespace
                logger.debug(u'torName %s' % tor_Title)          

                match_ratio = int(lazylibrarian.MATCH_RATIO)
                tor_Title_match = fuzz.token_sort_ratio(book['searchterm'].lower(), tor_Title)
                logger.debug("Torrent Title Match %: " + str(tor_Title_match))
                
                if (tor_Title_match > match_ratio):
                    logger.info(u'Found Torrent: %s' % tor['tor_title'])
                    addedCounter = addedCounter + 1
                    bookid = book['bookid']
                    tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip()
                    tor_url = tor['tor_url']
                    tor_prov = tor['tor_prov']
                    
                    tor_size_temp = tor['tor_size']  #Need to cater for when this is NONE (Issue 35)
                    if tor_size_temp is None:
                        tor_size_temp = 1000
                    tor_size = str(round(float(tor_size_temp) / 1048576,2))+' MB'
                    
                    controlValueDict = {"NZBurl": tor_url}
                    newValueDict = {
                        "NZBprov": tor_prov,
                        "BookID": bookid,
                        "NZBsize": tor_size,
                        "NZBtitle": tor_Title,
                        "Status": "Skipped"
                    }
                    myDB.upsert("wanted", newValueDict, controlValueDict)

                    snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone()
                    if not snatchedbooks:
                        snatch = DownloadMethod(bookid, tor_prov, tor_Title, tor_url)
                        notifiers.notify_snatch(tor_Title+' at '+formatter.now()) 
                    break;
            if addedCounter == 0:
                logger.info("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.")
        counter = counter + 1

   # if not books or books==False:
   #     snatched = searchmag.searchmagazines(mags)
   #     for items in snatched:
   #         snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url'])
   #         notifiers.notify_snatch(items['tor_title']+' at '+formatter.now()) 
    logger.info("Search for Wanted items complete")
Exemplo n.º 47
0
def processDir(force=False, reset=False):
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)
        return False

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if force is False and len(snatched) == 0:
        logger.info('Nothing marked as snatched. Stopping postprocessor job.')
        common.schedule_job(action='Stop', target='processDir')
    elif len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            found = False
            for fname in downloads:
                if not fname.endswith('.fail'):  # has this failed before?
                    # this is to get round unicode differences in torrent filenames.
                    # there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode('utf-8')
                    else:
                        matchname = fname
                    if 'LL.(' in matchname:
                        matchname = matchname.split('LL.(')[0]
                    match = fuzz.token_set_ratio(matchname, book['NZBtitle'])
                    if match >= 95:
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle']))
                        found = True
                        break
            if found:
                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                        lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', mag_name)
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                continue

            # try:
            #    os.chmod(dest_path, 0777)
            # except Exception, e:
            #    logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname is not None:  # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(), 
                                    "IssueFile": dest_file,
                                    "IssueID" : magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                   }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    magazinescan.create_cover(dest_file)

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                # at this point, as it failed we should move it or it will get postprocessed 
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except:
                    logger.debug("Unable to rename %s" % pp_path)

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            if "LL.(" in directory and not directory.endswith('.fail'):
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1

        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
    if reset == True:
        common.schedule_job(action='Restart', target='processDir')
Exemplo n.º 48
0
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': ' ',
            '*': '',
            '(': '',
            ')': '',
            '[': '',
            ']': '',
            '#': '',
            '0': '',
            '1': '',
            '2': '',
            '3': '',
            '4': '',
            '5': '',
            '6': '',
            '7': '',
            '8': '',
            '9': '',
            '\'': '',
            ':': '',
            '!': '',
            '-': ' ',
            '\s\s': ' '
        }

        dic = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '.',
            ';': '',
            '\'': ''
        }

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' %
                     (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(
                replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ",
                                 resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match,
                              resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                    (url, ))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" %
                                 (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=?', (url, ))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" %
                                 (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith(
                    'http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" %
                             (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" %
                                     (resultTitle, word))
                        break

            size_temp = check_int(
                res[prefix + 'size'],
                1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res[
                        'tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [
                    x for x in wordlist if x not in getList(author.lower())
                ]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [
                        x for x in words
                        if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [
                        x for x in words if x not in getList(
                            lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [
                                i for i, x in enumerate(typelist) if x == item
                        ]:
                            score += i + 1

                matches.append(
                    [score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info(
                    'Nearest match (%s%%): %s using %s search for %s %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     book['authorName'], book['bookName']))
            else:
                logger.info(
                    'Best match (%s%%): %s using %s search, %s priority %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" %
                         (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' %
                     traceback.format_exc())
Exemplo n.º 49
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ',
                ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
        
    for nzb in resultlist:
        nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip()
        nzbTitle = re.sub(r"\s\s+", " ", nzbTitle)  # remove extra whitespace
        
        #nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle)
        #logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        if searchtype == 'book' or searchtype == 'shortbook':
            nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle)
            logger.debug(u"NZB token set Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        elif searchtype == 'author':
            nzbTitle_match = fuzz.token_set_ratio(book['authorName'].encode('utf-8'), nzbTitle)
            logger.debug(u"NZB author Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
            if nzbTitle_match > match_ratio:
                nzbTitle_match = fuzz.token_set_ratio(book['bookName'].encode('utf-8'), nzbTitle)
                logger.debug(u"NZB book Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        else:  # searchtype == 'general':
            nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle)
            logger.debug(u"NZB Title general Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        
        if (nzbTitle_match > match_ratio):
            logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype))
            bookid = book['bookid']
            nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip()
            nzburl = nzb['nzburl']
            nzbprov = nzb['nzbprov']
            nzbdate_temp = nzb['nzbdate']
            nzbsize_temp = nzb['nzbsize']  # Need to cater for when this is NONE (Issue 35)
            if nzbsize_temp is None:
                nzbsize_temp = 1000
            nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
            nzbdate = formatter.nzbdate2format(nzbdate_temp)
            nzbmode = nzb['nzbmode']
            controlValueDict = {"NZBurl": nzburl}
            newValueDict = {
                "NZBprov": nzbprov,
                "BookID": bookid,
                "NZBdate": nzbdate,
                "NZBsize": nzbsize,
                "NZBtitle": nzbTitle,
                "NZBmode": nzbmode,
                "Status": "Skipped"
            }
            myDB.upsert("wanted", newValueDict, controlValueDict)

            snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                        bookid).fetchone()
            if not snatchedbooks:
                if nzbmode == "torznab":
                    snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                else:
                    snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(nzbTitle) + ' at ' + formatter.now())
                    postprocess.schedule_processor(action='Start')
                    return True
            
    logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() +
                     " using searchtype " + searchtype)
    return False
Exemplo n.º 50
0
def search_tor_book(books=None, mags=None):
    if not (lazylibrarian.USE_TOR):
        return
    # rename this thread
    threading.currentThread().name = "SEARCHTORBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    searchlist1 = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"'
        )

        # Clear cache
        if os.path.exists(".ProviderCache"):
            for f in os.listdir(".ProviderCache"):
                os.unlink("%s/%s" % (".ProviderCache", f))

        # Clearing throttling timeouts
        t = SimpleCache.ThrottlingProcessor()
        t.lastRequestTime.clear()
    else:
        # The user has added a new book
        searchbooks = []
        if books != False:
            for book in books:
                searchbook = myDB.select(
                    'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"',
                    [book['bookid']])
                for terms in searchbook:
                    searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]

        dic = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '',
            ';': ''
        }
        dicSearchFormatting = {'.': ' +', ' + ': ' '}

        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))

        # TRY SEARCH TERM just using author name and book type
        author = formatter.latinToAscii(
            formatter.replace_all(author, dicSearchFormatting))
        searchterm = author + ' ' + book  # + ' ' + lazylibrarian.EBOOK_TYPE
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8')
        searchterm = re.sub(r"\s\s+", " ",
                            searchterm)  # strip any double white space
        searchlist.append({
            "bookid": bookid,
            "bookName": searchbook[2],
            "authorName": searchbook[1],
            "searchterm": searchterm.strip()
        })

    if not lazylibrarian.KAT:
        logger.info('No download method is set, use SABnzbd or blackhole')

    counter = 0
    for book in searchlist:
        #print book.keys()
        resultlist = providers.IterateOverTorrentSites(book, 'book')

        #if you can't find teh book specifically, you might find under general search
        if not resultlist:
            logger.info(
                "Searching for type book failed to find any books...moving to general search"
            )
            resultlist = providers.IterateOverTorrentSites(book, 'general')

        if not resultlist:
            logger.debug("Adding book %s to queue." % book['searchterm'])

        else:
            dictrepl = {
                '...': '',
                '.': ' ',
                ' & ': ' ',
                ' = ': ' ',
                '?': '',
                '$': 's',
                ' + ': ' ',
                '"': '',
                ',': '',
                '*': '',
                '(': '',
                ')': '',
                '[': '',
                ']': '',
                '#': '',
                '0': '',
                '1': '',
                '2': '',
                '3': '',
                '4': '',
                '5': '',
                '6': '',
                '7': '',
                '8': '',
                '9': '',
                '\'': '',
                ':': '',
                '!': '',
                '-': '',
                '\s\s': ' ',
                ' the ': ' ',
                ' a ': ' ',
                ' and ': ' ',
                ' to ': ' ',
                ' of ': ' ',
                ' for ': ' ',
                ' my ': ' ',
                ' in ': ' ',
                ' at ': ' ',
                ' with ': ' '
            }
            logger.debug(u'searchterm %s' % book['searchterm'])
            addedCounter = 0

            for tor in resultlist:
                tor_Title = formatter.latinToAscii(
                    formatter.replace_all(
                        str(tor['tor_title']).lower(), dictrepl)).strip()
                tor_Title = re.sub(r"\s\s+", " ",
                                   tor_Title)  #remove extra whitespace
                logger.debug(u'torName %s' % tor_Title)

                match_ratio = int(lazylibrarian.MATCH_RATIO)
                tor_Title_match = fuzz.token_sort_ratio(
                    book['searchterm'].lower(), tor_Title)
                logger.debug("Torrent Title Match %: " + str(tor_Title_match))

                if (tor_Title_match > match_ratio):
                    logger.info(u'Found Torrent: %s' % tor['tor_title'])
                    addedCounter = addedCounter + 1
                    bookid = book['bookid']
                    tor_Title = (book["authorName"] + ' - ' +
                                 book['bookName'] + ' LL.(' + book['bookid'] +
                                 ')').strip()
                    tor_url = tor['tor_url']
                    tor_prov = tor['tor_prov']

                    tor_size_temp = tor[
                        'tor_size']  #Need to cater for when this is NONE (Issue 35)
                    if tor_size_temp is None:
                        tor_size_temp = 1000
                    tor_size = str(round(float(tor_size_temp) / 1048576,
                                         2)) + ' MB'

                    controlValueDict = {"NZBurl": tor_url}
                    newValueDict = {
                        "NZBprov": tor_prov,
                        "BookID": bookid,
                        "NZBsize": tor_size,
                        "NZBtitle": tor_Title,
                        "Status": "Skipped"
                    }
                    myDB.upsert("wanted", newValueDict, controlValueDict)

                    snatchedbooks = myDB.action(
                        'SELECT * from books WHERE BookID=? and Status="Snatched"',
                        [bookid]).fetchone()
                    if not snatchedbooks:
                        snatch = DownloadMethod(bookid, tor_prov, tor_Title,
                                                tor_url)
                        notifiers.notify_snatch(tor_Title + ' at ' +
                                                formatter.now())
                    break
            if addedCounter == 0:
                logger.info("No torrent's found for " +
                            (book["authorName"] + ' ' +
                             book['bookName']).strip() +
                            ". Adding book to queue.")
        counter = counter + 1

# if not books or books==False:
#     snatched = searchmag.searchmagazines(mags)
#     for items in snatched:
#         snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url'])
#         notifiers.notify_snatch(items['tor_title']+' at '+formatter.now())
    logger.info("Search for Wanted items complete")
Exemplo n.º 51
0
def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    matches = []
    
    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, torTitle)
        tor_Title_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle))

        rejected = False
        for word in reject_list:
            if word in torTitle.lower() and not word in author.lower() and not word in book.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                break

            tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
            if tor_size_temp is None:
                tor_size_temp = 1000
            tor_size = round(float(tor_size_temp) / 1048576, 2)

            maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected):
            #logger.debug(u'Found RSS: %s' % tor['tor_title'])
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_url = tor['tor_url']
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }
            
            score = (tor_Title_match + tor_Author_match)/2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(formatter.getList(torTitle))
            words -= len(formatter.getList(author))
            words -= len(formatter.getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match RSS (%s%%): %s using %s search' % 
            (score, nzb_Title, searchtype))
                  
        myDB.upsert("wanted", newValueDict, controlValueDict)

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()

        if not snatchedbooks:  # check if one of the other downloaders got there first
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """  
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now())
                common.schedule_job(action='Start', target='processDir')
                return True

    logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                book['bookName']).strip())
    return False
Exemplo n.º 52
0
def processResultList(resultlist, authorname, bookname, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)

    matches = []

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = unaccented_str(replace_all(tor['tor_title'],
                                              dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(authorname, torTitle)
        tor_Title_match = fuzz.token_set_ratio(bookname, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" %
                     (tor_Author_match, tor_Title_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in authorname.lower(
                ) and word not in bookname.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (torTitle, word))
                    break

        tor_size_temp = tor[
            'tor_size']  # Need to cater for when this is NONE (Issue 35)
        if tor_size_temp is None:
            tor_size_temp = 1000
        tor_size = round(float(tor_size_temp) / 1048576, 2)
        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)

        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (tor_Title_match + tor_Author_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(authorname))
            words -= len(getList(bookname))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.debug(
                u'Nearest RSS match (%s%%): %s using %s search for %s %s' %
                (score, nzb_Title, searchtype, authorname, bookname))
            return False

        logger.info(u'Best RSS match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"])

        if snatchedbooks:  # check if one of the other downloaders got there first
            logger.info('%s already marked snatched' % nzb_Title)
            return True
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                logger.info(
                    'Downloading %s from %s' %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch(
                    "%s from %s at %s" %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No RSS found for " +
                     (book["authorName"] + ' ' + book['bookName']).strip())
    return False
Exemplo n.º 53
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab

    myDB = database.DBConnection()
    searchlist = []
    threading.currentThread().name = "SEARCHMAGS"

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 1:
        logger.info('Searching for one magazine')
    else:
        logger.info('Searching for %i magazines' % len(searchmags))

    for searchmag in searchmags:
        bookid = searchmag[0]
        searchterm = searchmag[0]
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}

        searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                resultlist.append({
                    'bookid': item['bookid'],
                    'nzbprov': item['tor_prov'],
                    'nzbtitle': item['tor_title'],
                    'nzburl': item['tor_url'],
                    'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                    'nzbsize': item['tor_size'],
                    'nzbmode': 'torrent'
                })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            to_snatch = 0
            maglist = []
            issues = []
            reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)
            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = (u'%s' % nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
                nzbdate = formatter.nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid)
                if checkifmag:
                    for results in checkifmag:
                        control_date = results['IssueDate']
                        # frequency = results['Frequency']
                        # regex = results['Regex']

                    nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace(
                        '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip()
                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # keyword_check = nzbtitle_formatted.replace(bookid, '')
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    name_match = 1  # assume name matches for now
                    if len(nzbtitle_exploded) > len(bookid_exploded):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(
                            common.remove_accents(bookid),
                            common.remove_accents(nzbtitle_formatted))
                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(
                                u"Magazine token set Match failed: " + str(
                                    mag_title_match) + "% for " + nzbtitle_formatted)
                            name_match = 0

                    lower_title = common.remove_accents(nzbtitle_formatted).lower()
                    lower_bookid = common.remove_accents(bookid).lower()
                    for word in reject_list:
                        if word in lower_title and not word in lower_bookid:
                            name_match = 0
                            logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                            break

                    if name_match:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf':
                                nzbtitle_exploded.pop()  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent

                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'

                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except:
                                        logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted)
                                        bad_date = bad_date + 1
                                        # allow issues with good name but bad date to be included
                                        # so user can manually select them, incl those with issue numbers
                                        newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                        # continue
                        else:
                            continue

                        #  store all the _new_ matching results, marking as "skipped" for now
                        #  we change the status to "wanted" on the ones we want to snatch later
                        #  don't add a new entry if this issue has been found on an earlier search
                        #  because status might have been user-set
                        mag_entry = myDB.select('SELECT * from wanted WHERE NZBtitle="%s" and NZBprov="%s"' % (nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": "Skipped",
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert("wanted", newValueDict, controlValueDict)

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            start_time = time.time()
                            start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                            control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))

                        # only grab a copy if it's newer than the most recent we have,
                        # or newer than a month ago if we have none
                        comp_date = formatter.datecompare(newdatish, control_date)
                        if comp_date > 0:
                            # Should probably only upsert when downloaded and processed in case snatch fails
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                to_snatch = to_snatch + 1
                                issues.append(issue)

                                controlValueDict = {"NZBurl": nzburl}
                                newValueDict = {
                                    "NZBdate": formatter.now(),  # when we asked for it
                                    "Status": "Wanted"
                                }
                                myDB.upsert("wanted", newValueDict, controlValueDict)

                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1
                    else:
                        logger.debug('Magazine [%s] does not completely match search term [%s].' % (
                                     nzbtitle_formatted, bookid))
                        bad_regex = bad_regex + 1

            logger.info('Found %i results for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (
                        total_nzbs, bookid, new_date, old_date, bad_date, bad_regex, to_snatch))

            for items in maglist:
                if items['nzbmode'] == "torznab":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                elif items['nzbmode'] == "torrent":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                else:
                    snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
            maglist = []

    if reset:
        common.schedule_job(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")
Exemplo n.º 54
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                ':': '', '!': '', '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ',
                # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    for nzb in resultlist:
        nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip()
        nzbTitle = re.sub(r"\s\s+", " ", nzbTitle)  # remove extra whitespace

        author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic))
        title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic))
        # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle)
        # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle)
        nzbBook_match = fuzz.token_set_ratio(title, nzbTitle)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle))

        rejected = False
        for word in reject_list:
            if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (nzbTitle, word))
                break

        if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected):
            logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype))
            bookid = book['bookid']
            nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
            nzburl = nzb['nzburl']
            nzbprov = nzb['nzbprov']
            nzbdate_temp = nzb['nzbdate']
            nzbsize_temp = nzb['nzbsize']  # Need to cater for when this is NONE (Issue 35)
            if nzbsize_temp is None:
                nzbsize_temp = 1000
            nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
            nzbdate = formatter.nzbdate2format(nzbdate_temp)
            nzbmode = nzb['nzbmode']
            controlValueDict = {"NZBurl": nzburl}
            newValueDict = {
                "NZBprov": nzbprov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": nzbsize,
                "NZBtitle": nzbTitle,
                "NZBmode": nzbmode,
                "Status": "Skipped"
            }
            myDB.upsert("wanted", newValueDict, controlValueDict)

            snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                        bookid).fetchone()
            if not snatchedbooks:
                if nzbmode == "torznab":
                    snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                else:
                    snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                if snatch:
                    notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
                    return True

    logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() +
                 " using searchtype " + searchtype)
    return False
Exemplo n.º 55
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(
            lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' %
                     (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
                (len(downloads), plural(
                    len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # not a directory, handle single file downloads here. Book/mag file in download root.
                        # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                        if is_valid_booktype(fname, booktype="book") \
                                or is_valid_booktype(fname, booktype="mag"):
                            fname = os.path.splitext(fname)[0]
                            dirname = os.path.join(processpath, fname)
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug(
                                        'Failed to create directory %s, %s' %
                                        (dirname, why.strerror))
                            if os.path.exists(dirname):
                                # move the book and any related files too
                                # ie other book formats, or opf, jpg with same title
                                # can't move metadata.opf or cover.jpg or similar
                                # as can't be sure they are ours
                                # not sure if we need a new listdir here, or whether we can use the old one
                                list_dir = os.listdir(processpath)
                                for ourfile in list_dir:
                                    if ourfile.startswith(fname):
                                        if is_valid_booktype(ourfile, booktype="book") \
                                            or is_valid_booktype(ourfile, booktype="mag") \
                                                or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                            try:
                                                shutil.move(
                                                    os.path.join(
                                                        processpath, ourfile),
                                                    os.path.join(
                                                        dirname, ourfile))
                                            except Exception as why:
                                                logger.debug(
                                                    "Failed to move file %s to %s, %s"
                                                    % (ourfile, dirname,
                                                       str(why)))

                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' %
                                     (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' %
                                 (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.debug(u'Best match (%s%%): %s for %s' %
                         (match, pp_path, book['NZBtitle']))

            data = myDB.match('SELECT * from books WHERE BookID="%s"' %
                              book['BookID'])
            if data:
                logger.debug(u'Processing book %s' % book['BookID'])
                authorname = data['AuthorName']
                bookname = data['BookName']
                if 'windows' in platform.system().lower(
                ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '/', '\\')
                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {
                    '<': '',
                    '>': '',
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': '',
                    '\'': ''
                }
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.match('SELECT * from magazines WHERE Title="%s"' %
                                  book['BookID'])
                if data:
                    logger.debug(u'Processing magazine %s' % book['BookID'])
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[
                        'IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {
                        '<': '',
                        '>': '',
                        '...': '',
                        ' & ': ' ',
                        ' = ': ' ',
                        '?': '',
                        '$': 's',
                        ' + ': ' ',
                        '"': '',
                        ',': '',
                        '*': '',
                        ':': '',
                        ';': '',
                        '\'': ''
                    }
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(
                            lazylibrarian.DESTINATION_DIR,
                            dest_path).encode(lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(
                            lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug(
                        "Snatched magazine %s is not in download directory" %
                        (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" %
                         (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name,
                                         book['NZBmode'])

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(
                            book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(
                            book['AuxInfo'])  # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                else:
                    newValueDict = {
                        "IssueDate": book['AuxInfo'],
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {
                    "Title": book['BookID'],
                    "IssueDate": book['AuxInfo']
                }
                newValueDict = {
                    "IssueAcquired":
                    today(),
                    "IssueFile":
                    dest_file,
                    "IssueID":
                    create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s from %s at %s" %
                            (global_name, book['NZBprov'], now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action(
                    'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                    book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except Exception as e:
                logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

    downloads = os.listdir(
        processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' %
                    (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')