Пример #1
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Пример #2
0
def get_issue_date(nzbtitle_exploded):
    regex_pass = 0
    issuedate = ''
    # Magazine names have many different styles of date
    # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
    # MonthName DD YYYY or MonthName DD, YYYY
    # YYYY MM or YYYY MM DD
    # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
    # nn YYYY issue number without "Nr" before it
    # issue and year as a single 6 digit string eg 222015
    # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
    pos = 0
    while pos < len(nzbtitle_exploded):
        year = check_year(nzbtitle_exploded[pos])
        if year and pos:
            month = month2num(nzbtitle_exploded[pos - 1])
            if month:
                if pos - 1:
                    day = check_int(nzbtitle_exploded[pos - 2], 1)
                    if day > 31:  # probably issue number nn
                        day = 1
                else:
                    day = 1
                issuedate = "%04d-%02d-%02d" % (year, month, day)
                try:
                    _ = datetime.date(year, month, day)
                    regex_pass = 1
                    break
                except ValueError:
                    regex_pass = 0
        pos += 1

    # MonthName DD YYYY or MonthName DD, YYYY
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and (pos - 1):
                month = month2num(nzbtitle_exploded[pos - 2])
                if month:
                    day = check_int(nzbtitle_exploded[pos - 1].rstrip(','), 1)
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        regex_pass = 2
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # YYYY MM or YYYY MM DD
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and pos + 1 < len(nzbtitle_exploded):
                month = check_int(nzbtitle_exploded[pos + 1], 0)
                if month:
                    if pos + 2 < len(nzbtitle_exploded):
                        day = check_int(nzbtitle_exploded[pos + 2], 1)
                    else:
                        day = 1
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        regex_pass = 3
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
    if not regex_pass:
        nouns = ["issue", "no", "nr", "vol", "volume"]
        pos = 0
        while pos < len(nzbtitle_exploded):
            if nzbtitle_exploded[pos].lower().strip('.') in nouns:
                if pos + 1 < len(nzbtitle_exploded):
                    issue = check_int(nzbtitle_exploded[pos + 1], 0)
                    if issue:
                        issuedate = str(issue)  # 4 == 04 == 004
                        if pos + 2 < len(nzbtitle_exploded):
                            year = check_year(nzbtitle_exploded[pos + 2])
                            if year and year < int(datetime.date.today().year):
                                issuedate = '0'  # it's old
                            regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                        else:
                            regex_pass = 5  # Issue/No/Nr/Vol nn
                        break
            pos += 1

    # nn YYYY issue number without "Nr" before it
    if not regex_pass:
        pos = 1
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year:
                issue = check_int(nzbtitle_exploded[pos - 1], 0)
                if issue:
                    issuedate = str(issue)  # 4 == 04 == 004
                    regex_pass = 6
                    if year < int(datetime.date.today().year):
                        issuedate = '0'  # it's old
                    break
            pos += 1

    # issue and year as a single 6 digit string eg 222015
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) == 6:
                year = int(issue[2:])
                issue = int(issue[:2])
                issuedate = str(issue)  # 4 == 04 == 004
                regex_pass = 7
                if year < int(datetime.date.today().year):
                    issuedate = '0'  # it's old
                break
            pos += 1

    # issue as a 3 or more digit string with leading zero eg 0063
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) > 2 and issue[0] == '0':
                issuedate = issue
                regex_pass = 8
                break
            pos += 1

    return regex_pass, issuedate
Пример #3
0
def setSeries(serieslist=None, bookid=None, authorid=None, workid=None):
    """ set series details in series/member tables from the supplied dict
        and a displayable summary in book table
        serieslist is a tuple (SeriesID, SeriesNum, SeriesName)
        Return how many api hits and the original publication date if known """
    myDB = database.DBConnection()
    api_hits = 0
    originalpubdate = ''
    if bookid:
        # delete any old series-member entries
        myDB.action('DELETE from member WHERE BookID=?', (bookid, ))
        for item in serieslist:
            match = myDB.match(
                'SELECT SeriesID from series where SeriesName=? COLLATE NOCASE',
                (item[2], ))
            if match:
                seriesid = match['SeriesID']
                members, _api_hits = getSeriesMembers(seriesid, item[2])
                api_hits += _api_hits
            else:
                # new series, need to set status and get SeriesID
                if item[0]:
                    seriesid = item[0]
                    members, _api_hits = getSeriesMembers(seriesid, item[2])
                    api_hits += _api_hits
                else:
                    # no seriesid so generate it (row count + 1)
                    cnt = myDB.match("select count(*) as counter from series")
                    res = check_int(cnt['counter'], 0)
                    seriesid = str(res + 1)
                    members = []
                myDB.action('INSERT into series VALUES (?, ?, ?, ?, ?)',
                            (seriesid, item[2], "Active", 0, 0),
                            suppress='UNIQUE')

            if not workid or not authorid:
                book = myDB.match(
                    'SELECT AuthorID,WorkID from books where BookID=?',
                    (bookid, ))
                if book:
                    authorid = book['AuthorID']
                    workid = book['WorkID']
            if seriesid and authorid and workid:
                for member in members:
                    if member[3] == workid:
                        if check_year(member[5], past=1800, future=0):
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "BookDate": member[5],
                                "OriginalPubDate": member[5]
                            }
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            originalpubdate = member[5]
                        break

                controlValueDict = {"BookID": bookid, "SeriesID": seriesid}
                newValueDict = {"SeriesNum": item[1], "WorkID": workid}
                myDB.upsert("member", newValueDict, controlValueDict)
                myDB.action(
                    'INSERT INTO seriesauthors ("SeriesID", "AuthorID") VALUES (?, ?)',
                    (seriesid, authorid),
                    suppress='UNIQUE')
            else:
                if not authorid:
                    logger.debug(
                        'Unable to set series for book %s, no authorid' %
                        bookid)
                elif not workid:
                    logger.debug(
                        'Unable to set series for book %s, no workid' % bookid)
                elif not seriesid:
                    logger.debug(
                        'Unable to set series for book %s, no seriesid' %
                        bookid)
                return api_hits, originalpubdate

        series = ''
        for item in serieslist:
            newseries = "%s %s" % (item[2], item[1])
            newseries.strip()
            if series and newseries:
                series += '<br>'
            series += newseries
        myDB.action('UPDATE books SET SeriesDisplay=? WHERE BookID=?',
                    (series, bookid))
        return api_hits, originalpubdate
Пример #4
0
def magazineScan(title=None):
    lazylibrarian.MAG_UPDATE = 1

    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        onetitle = title
        if onetitle:
            mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace(
                '$Title', onetitle)
        else:
            mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER'])

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)
        if PY2:
            mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?',
                                (issuefile, ))
                    logger.info('Issue %s - %s deleted as not found on disk' %
                                (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus":
                        "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            # now check the magazine titles and delete any with no issues
            if lazylibrarian.CONFIG['MAG_DELFOLDER']:
                mags = myDB.select(
                    'SELECT Title,count(Title) as counter from issues group by Title'
                )
                for mag in mags:
                    title = mag['Title']
                    issues = mag['counter']
                    if not issues:
                        logger.debug('Magazine %s deleted as no issues found' %
                                     title)
                        myDB.action('DELETE from magazines WHERE Title=?',
                                    (title, ))

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e",
                "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)):
            rootdir = makeUnicode(rootdir)
            filenames = [makeUnicode(item) for item in filenames]
            for fname in filenames:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            title = match.group("title")
                            issuedate = match.group("issuedate")
                            if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                logger.debug("Title pattern [%s][%s]" %
                                             (title, issuedate))
                            match = True
                        else:
                            logger.debug(
                                "Title pattern match failed for [%s]" % fname)
                    except Exception:
                        match = False

                    if not match:
                        # noinspection PyBroadException
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(rootdir)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                    logger.debug("Date pattern [%s][%s]" %
                                                 (title, issuedate))
                                match = True
                            else:
                                logger.debug(
                                    "Date pattern match failed for [%s]" %
                                    fname)
                        except Exception:
                            match = False

                    if not match:
                        title = os.path.basename(rootdir)
                        issuedate = ''

                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': '',
                        '[': ' ',
                        ']': ' ',
                        '#': '# '
                    }
                    if issuedate:
                        exploded = replace_all(issuedate, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(
                            exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("Date regex [%s][%s][%s]" %
                                         (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        exploded = replace_all(fname, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(
                            exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("File regex [%s][%s][%s]" %
                                         (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        logger.warn("Invalid name format for [%s]" % fname)
                        continue

                    issuefile = os.path.join(rootdir,
                                             fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(
                        datetime.date.fromtimestamp(mtime))

                    if lazylibrarian.CONFIG['MAG_RENAME']:
                        filedate = issuedate
                        if issuedate and issuedate.isdigit():
                            if len(issuedate) == 8:
                                if check_year(issuedate[:4]):
                                    filedate = 'Issue %d %s' % (int(
                                        issuedate[4:]), issuedate[:4])
                                else:
                                    filedate = 'Vol %d Iss %d' % (int(
                                        issuedate[:4]), int(issuedate[4:]))
                            elif len(issuedate) == 12:
                                filedate = 'Vol %d Iss %d %s' % (int(
                                    issuedate[4:8]), int(
                                        issuedate[8:]), issuedate[:4])
                            else:
                                filedate = str(issuedate).zfill(4)

                        extn = os.path.splitext(fname)[1]
                        newfname = lazylibrarian.CONFIG[
                            'MAG_DEST_FILE'].replace('$Title', title).replace(
                                '$IssueDate', filedate)
                        newfname = newfname + extn
                        if newfname and newfname != fname:
                            logger.debug("Rename %s -> %s" % (fname, newfname))
                            newissuefile = os.path.join(rootdir, newfname)
                            newissuefile = safe_move(issuefile, newissuefile)
                            if os.path.exists(issuefile.replace(extn, '.jpg')):
                                safe_move(issuefile.replace(extn, '.jpg'),
                                          newissuefile.replace(extn, '.jpg'))
                            if os.path.exists(issuefile.replace(extn, '.opf')):
                                safe_move(issuefile.replace(extn, '.opf'),
                                          newissuefile.replace(extn, '.opf'))
                            issuefile = newissuefile

                    logger.debug("Found %s Issue %s" % (title, issuedate))
                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?',
                        (title, ))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None,
                            "CoverPage": 1
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                        magissuedate = None
                        magazineadded = None
                        maglastacquired = None
                        magcoverpage = 1
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)
                        magcoverpage = mag_entry['CoverPage']

                    issuedate = str(issuedate).zfill(
                        4)  # for sorting issue numbers

                    # is this issue already in the database?
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match(
                        'SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?',
                        (title, issuedate))
                    new_entry = False
                    if not iss_entry or iss_entry['IssueFile'] != issuefile:
                        new_entry = True  # new entry or name changed
                        if not iss_entry:
                            logger.debug("Adding issue %s %s" %
                                         (title, issuedate))
                        else:
                            logger.debug("Updating issue %s %s" %
                                         (title, issuedate))
                        controlValueDict = {
                            "Title": title,
                            "IssueDate": issuedate
                        }
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)

                    ignorefile = os.path.join(os.path.dirname(issuefile),
                                              '.ll_ignore')
                    with open(ignorefile, 'a'):
                        os.utime(ignorefile, None)

                    createMagCover(issuefile,
                                   pagenum=magcoverpage,
                                   refresh=new_entry)
                    lazylibrarian.postprocess.processMAGOPF(
                        issuefile,
                        title,
                        issuedate,
                        issue_id,
                        overwrite=new_entry)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover":
                            os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(
                                issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            magcount = myDB.match("select count(*) from magazines")
            isscount = myDB.match("select count(*) from issues")
            logger.info(
                "Magazine scan complete, found %s magazine%s, %s issue%s" %
                (magcount['count(*)'], plural(magcount['count(*)']),
                 isscount['count(*)'], plural(isscount['count(*)'])))
        else:
            logger.info("Magazine scan complete")
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' %
                     traceback.format_exc())
Пример #5
0
def get_issue_date(nzbtitle_exploded):
    regex_pass = 0
    issuedate = ''
    year = 0
    # Magazine names have many different styles of date
    # These are the ones we can currently match...
    # 1 MonthName MonthName YYYY (bi-monthly just use first month as date)
    # 2 nn, MonthName YYYY  where nn is an assumed issue number (just use month and year)
    # 3 DD MonthName YYYY (daily, weekly, bi-weekly, monthly)
    # 4 MonthName YYYY (monthly)
    # 5 MonthName DD YYYY or MonthName DD, YYYY (daily, weekly, bi-weekly, monthly)
    # 6 YYYY MM DD or YYYY MonthName DD (daily, weekly, bi-weekly, monthly)
    # 7 YYYY MM or YYYY MonthName (monthly)
    # 8 Volume x Issue y in either order, with year
    # 9 Volume x Issue y in either order, without year
    # 10 Issue/No/Nr/Vol/# nn, YYYY (prepend year to zero filled issue number)
    # 11 Issue/No/Nr/Vol/# nn (no year found, hopefully rolls on year on year)
    # 12 nn YYYY issue number without Issue/No/Nr/Vol/# in front (unsure, nn could be issue or month number)
    # 13 issue and year as a single 6 digit string eg 222015 (some uploaders use this, reverse it to YYYYIIII)
    # 14 3 or more digit zero padded issue number eg 0063 (issue with no year)
    # 15 just a year (annual)
    # 16 to 18 internal issuedates used for filenames, YYYYIIII, VVVVIIII, YYYYVVVVIIII
    #
    issuenouns = ["issue", "iss", "no", "nr", '#']
    volumenouns = ["vol", "volume"]
    nouns = issuenouns + volumenouns

    pos = 0
    while pos < len(nzbtitle_exploded):
        year = check_year(nzbtitle_exploded[pos])
        if year and pos:
            month = month2num(nzbtitle_exploded[pos - 1])
            if month:
                if pos > 1:
                    month2 = month2num(nzbtitle_exploded[pos - 2])
                    if month2:
                        # bimonthly, for now just use first month
                        month = min(month, month2)
                        day = 1
                        regex_pass = 1
                    else:
                        day = check_int(re.sub("\D", "", nzbtitle_exploded[pos - 2]), 0)
                        if pos > 2 and nzbtitle_exploded[pos-3].lower().strip('.') in nouns:
                            # definitely an issue number
                            issuedate = str(day)  # 4 == 04 == 004
                            regex_pass = 10
                        elif day > 31:  # probably issue number nn
                            regex_pass = 2
                            day = 1
                        elif day:
                            regex_pass = 3
                        else:
                            regex_pass = 4
                            day = 1
                else:
                    regex_pass = 4
                    day = 1

                if not issuedate:
                    issuedate = "%04d-%02d-%02d" % (year, month, day)
                try:
                    _ = datetime.date(year, month, day)
                    break
                except ValueError:
                    regex_pass = 0
        pos += 1

    # MonthName DD YYYY or MonthName DD, YYYY
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and (pos > 1):
                month = month2num(nzbtitle_exploded[pos - 2])
                if month:
                    day = check_int(re.sub("\D", "", nzbtitle_exploded[pos - 1]), 0)
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        regex_pass = 5
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # YYYY MM_or_MonthName or YYYY MM_or_MonthName DD
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and pos + 1 < len(nzbtitle_exploded):
                month = month2num(nzbtitle_exploded[pos + 1])
                if not month:
                    month = check_int(nzbtitle_exploded[pos + 1], 0)
                if month:
                    if pos + 2 < len(nzbtitle_exploded):
                        day = check_int(re.sub("\D", "", nzbtitle_exploded[pos + 2]), 0)
                        if day:
                            regex_pass = 6
                        else:
                            regex_pass = 7
                            day = 1
                    else:
                        regex_pass = 7
                        day = 1
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # scan for a year in the name
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year:
                break
            pos += 1

        # Volume x Issue y in either order, with/without year in any position
        vol = 0
        iss = 0
        pos = 0
        while pos + 1 < len(nzbtitle_exploded):
            res = check_int(nzbtitle_exploded[pos + 1], 0)
            if res:
                if nzbtitle_exploded[pos] in issuenouns:
                    iss = res
                if nzbtitle_exploded[pos] in volumenouns:
                    vol = res
            if vol and iss:
                if year:
                    issuedate = "%s%04d%04d" % (year, vol, iss)
                    regex_pass = 8
                else:
                    issuedate = "%04d%04d" % (vol, iss)
                    regex_pass = 9
                break
            pos += 1

    # Issue/No/Nr/Vol/# nn with/without year in any position
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            if nzbtitle_exploded[pos].lower().strip('.') in nouns:
                if pos + 1 < len(nzbtitle_exploded):
                    issue = check_int(nzbtitle_exploded[pos + 1], 0)
                    if issue:
                        issuedate = str(issue)  # 4 == 04 == 004
                        # we searched for year prior to regex 8/9
                        if year:
                            regex_pass = 10  # Issue/No/Nr/Vol nn, YYYY
                        else:
                            regex_pass = 11  # Issue/No/Nr/Vol nn
                        break
            pos += 1

    # nn YYYY issue number without "Nr" before it
    if not regex_pass and year:
        pos = 1
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year:
                issue = check_int(nzbtitle_exploded[pos - 1], 0)
                if issue:
                    issuedate = str(issue)  # 4 == 04 == 004
                    regex_pass = 12
                    break
            pos += 1

    # issue and year as a single 6 digit string eg 222015
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) == 6:
                year = check_year(int(issue[2:]))
                if year:
                    issue = int(issue[:2])
                    issuedate = str(issue)  # 4 == 04 == 004
                    regex_pass = 13
                    break
            pos += 1

    # issue as a 3 or more digit string with leading zero eg 0063
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) > 2 and issue[0] == '0':
                issuedate = issue
                year = 0
                regex_pass = 14
                break
            pos += 1

    # Annual - only a year found, year was found prior to regex 8/9
    if not regex_pass and year:
        issuedate = "%s-01-01" % year
        regex_pass = 15

    # YYYYIIII internal issuedates for filenames
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit():
                if len(issue) == 8:
                    if check_year(issue[:4]):  # YYYYIIII
                        year = issue[:4]
                        issuedate = issue
                        regex_pass = 16
                        break
                    else:
                        issuedate = issue  # VVVVIIII
                        regex_pass = 17
                        break
                elif len(issuedate) == 12:  # YYYYVVVVIIII
                    year = issue[:4]
                    issuedate = issue
                    regex_pass = 18
                    break
            pos += 1
    return regex_pass, issuedate, year
Пример #6
0
def get_issue_date(nzbtitle_exploded):
    regex_pass = 0
    issuedate = ''
    year = 0
    # Magazine names have many different styles of date
    # These are the ones we can currently match...
    # 1 MonthName MonthName YYYY (bi-monthly just use first month as date)
    # 2 nn, MonthName YYYY  where nn is an assumed issue number (just use month and year)
    # 3 DD MonthName YYYY (daily, weekly, bi-weekly, monthly)
    # 4 MonthName YYYY (monthly)
    # 5 MonthName DD YYYY or MonthName DD, YYYY (daily, weekly, bi-weekly, monthly)
    # 6 YYYY MM DD or YYYY MonthName DD (daily, weekly, bi-weekly, monthly)
    # 7 YYYY MM or YYYY MonthName (monthly)
    # 8 Volume x Issue y in either order, with year
    # 9 Volume x Issue y in either order, without year
    # 10 Issue/No/Nr/Vol/# nn, YYYY (prepend year to zero filled issue number)
    # 11 Issue/No/Nr/Vol/# nn (no year found, hopefully rolls on year on year)
    # 12 nn YYYY issue number without Issue/No/Nr/Vol/# in front (unsure, nn could be issue or month number)
    # 13 issue and year as a single 6 digit string eg 222015 (some uploaders use this, reverse it to YYYYIIII)
    # 14 3 or more digit zero padded issue number eg 0063 (issue with no year)
    # 15 just a year (annual)
    # 16 to 18 internal issuedates used for filenames, YYYYIIII, VVVVIIII, YYYYVVVVIIII
    #
    issuenouns = ["issue", "iss", "no", "nr", '#']
    volumenouns = ["vol", "volume"]
    nouns = issuenouns + volumenouns

    pos = 0
    while pos < len(nzbtitle_exploded):
        year = check_year(nzbtitle_exploded[pos])
        if year and pos:
            month = month2num(nzbtitle_exploded[pos - 1])
            if month:
                if pos > 1:
                    month2 = month2num(nzbtitle_exploded[pos - 2])
                    if month2:
                        # bimonthly, for now just use first month
                        month = min(month, month2)
                        day = 1
                        regex_pass = 1
                    else:
                        day = check_int(re.sub("\D", "", nzbtitle_exploded[pos - 2]), 0)
                        if pos > 2 and nzbtitle_exploded[pos-3].lower().strip('.') in nouns:
                            # definitely an issue number
                            issuedate = str(day)  # 4 == 04 == 004
                            regex_pass = 10
                        elif day > 31:  # probably issue number nn
                            regex_pass = 2
                            day = 1
                        elif day:
                            regex_pass = 3
                        else:
                            regex_pass = 4
                            day = 1
                else:
                    regex_pass = 4
                    day = 1

                if not issuedate:
                    issuedate = "%04d-%02d-%02d" % (year, month, day)
                try:
                    _ = datetime.date(year, month, day)
                    break
                except ValueError:
                    regex_pass = 0
        pos += 1

    # MonthName DD YYYY or MonthName DD, YYYY
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and (pos > 1):
                month = month2num(nzbtitle_exploded[pos - 2])
                if month:
                    day = check_int(re.sub("\D", "", nzbtitle_exploded[pos - 1]), 0)
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        regex_pass = 5
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # YYYY MM_or_MonthName or YYYY MM_or_MonthName DD
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year and pos + 1 < len(nzbtitle_exploded):
                month = month2num(nzbtitle_exploded[pos + 1])
                if not month:
                    month = check_int(nzbtitle_exploded[pos + 1], 0)
                if month:
                    if pos + 2 < len(nzbtitle_exploded):
                        day = check_int(re.sub("\D", "", nzbtitle_exploded[pos + 2]), 0)
                        if day:
                            regex_pass = 6
                        else:
                            regex_pass = 7
                            day = 1
                    else:
                        regex_pass = 7
                        day = 1
                    try:
                        _ = datetime.date(year, month, day)
                        issuedate = "%04d-%02d-%02d" % (year, month, day)
                        break
                    except ValueError:
                        regex_pass = 0
            pos += 1

    # scan for a year in the name
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year:
                break
            pos += 1

        # Volume x Issue y in either order, with/without year in any position
        vol = 0
        iss = 0
        pos = 0
        while pos + 1 < len(nzbtitle_exploded):
            res = check_int(nzbtitle_exploded[pos + 1], 0)
            if res:
                if nzbtitle_exploded[pos] in issuenouns:
                    iss = res
                if nzbtitle_exploded[pos] in volumenouns:
                    vol = res
            if vol and iss:
                if year:
                    issuedate = "%s%04d%04d" % (year, vol, iss)
                    regex_pass = 8
                else:
                    issuedate = "%04d%04d" % (vol, iss)
                    regex_pass = 9
                break
            pos += 1

    # Issue/No/Nr/Vol/# nn with/without year in any position
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            if nzbtitle_exploded[pos].lower().strip('.') in nouns:
                if pos + 1 < len(nzbtitle_exploded):
                    issue = check_int(nzbtitle_exploded[pos + 1], 0)
                    if issue:
                        issuedate = str(issue)  # 4 == 04 == 004
                        # we searched for year prior to regex 8/9
                        if year:
                            regex_pass = 10  # Issue/No/Nr/Vol nn, YYYY
                        else:
                            regex_pass = 11  # Issue/No/Nr/Vol nn
                        break
            pos += 1

    # nn YYYY issue number without "Nr" before it
    if not regex_pass and year:
        pos = 1
        while pos < len(nzbtitle_exploded):
            year = check_year(nzbtitle_exploded[pos])
            if year:
                issue = check_int(nzbtitle_exploded[pos - 1], 0)
                if issue:
                    issuedate = str(issue)  # 4 == 04 == 004
                    regex_pass = 12
                    break
            pos += 1

    # issue and year as a single 6 digit string eg 222015
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) == 6:
                year = check_year(int(issue[2:]))
                if year:
                    issue = int(issue[:2])
                    issuedate = str(issue)  # 4 == 04 == 004
                    regex_pass = 13
                    break
            pos += 1

    # issue as a 3 or more digit string with leading zero eg 0063
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit() and len(issue) > 2 and issue[0] == '0':
                issuedate = issue
                year = 0
                regex_pass = 14
                break
            pos += 1

    # Annual - only a year found, year was found prior to regex 8/9
    if not regex_pass and year:
        issuedate = "%s-01-01" % year
        regex_pass = 15

    # YYYYIIII internal issuedates for filenames
    if not regex_pass:
        pos = 0
        while pos < len(nzbtitle_exploded):
            issue = nzbtitle_exploded[pos]
            if issue.isdigit():
                if len(issue) == 8:
                    if check_year(issue[:4]):  # YYYYIIII
                        year = issue[:4]
                        issuedate = issue
                        regex_pass = 16
                        break
                    else:
                        issuedate = issue  # VVVVIIII
                        regex_pass = 17
                        break
                elif len(issuedate) == 12:  # YYYYVVVVIIII
                    year = issue[:4]
                    issuedate = issue
                    regex_pass = 18
                    break
            pos += 1
    return regex_pass, issuedate, year
Пример #7
0
def magazineScan(title=None):
    lazylibrarian.MAG_UPDATE = 1

    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        onetitle = title
        if onetitle:
            mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace('$Title', onetitle)
        else:
            mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER'])

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)
        if PY2:
            mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?', (issuefile,))
                    logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus": "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            # now check the magazine titles and delete any with no issues
            if lazylibrarian.CONFIG['MAG_DELFOLDER']:
                mags = myDB.select('SELECT Title,count(Title) as counter from issues group by Title')
                for mag in mags:
                    title = mag['Title']
                    issues = mag['counter']
                    if not issues:
                        logger.debug('Magazine %s deleted as no issues found' % title)
                        myDB.action('DELETE from magazines WHERE Title=?', (title,))

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)):
            rootdir = makeUnicode(rootdir)
            filenames = [makeUnicode(item) for item in filenames]
            for fname in filenames:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            title = match.group("title")
                            issuedate = match.group("issuedate")
                            if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                logger.debug("Title pattern [%s][%s]" % (title, issuedate))
                            match = True
                        else:
                            logger.debug("Title pattern match failed for [%s]" % fname)
                    except Exception:
                        match = False

                    if not match:
                        # noinspection PyBroadException
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(rootdir)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                    logger.debug("Date pattern [%s][%s]" % (title, issuedate))
                                match = True
                            else:
                                logger.debug("Date pattern match failed for [%s]" % fname)
                        except Exception:
                            match = False

                    if not match:
                        title = os.path.basename(rootdir)
                        issuedate = ''

                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    if issuedate:
                        exploded = replace_all(issuedate, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        exploded = replace_all(fname, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        logger.warn("Invalid name format for [%s]" % fname)
                        continue

                    issuefile = os.path.join(rootdir, fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime))

                    if lazylibrarian.CONFIG['MAG_RENAME']:
                        filedate = issuedate
                        if issuedate and issuedate.isdigit():
                            if len(issuedate) == 8:
                                if check_year(issuedate[:4]):
                                    filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4])
                                else:
                                    filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:]))
                            elif len(issuedate) == 12:
                                filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]),
                                                                 issuedate[:4])
                            else:
                                filedate = str(issuedate).zfill(4)

                        extn = os.path.splitext(fname)[1]
                        newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace(
                                                                                 '$IssueDate', filedate)
                        newfname = newfname + extn
                        if newfname and newfname != fname:
                            logger.debug("Rename %s -> %s" % (fname, newfname))
                            newissuefile = os.path.join(rootdir, newfname)
                            newissuefile = safe_move(issuefile, newissuefile)
                            if os.path.exists(issuefile.replace(extn, '.jpg')):
                                safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg'))
                            if os.path.exists(issuefile.replace(extn, '.opf')):
                                safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf'))
                            issuefile = newissuefile

                    logger.debug("Found %s Issue %s" % (title, issuedate))
                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title,))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None,
                            "CoverPage": 1
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                        magissuedate = None
                        magazineadded = None
                        maglastacquired = None
                        magcoverpage = 1
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)
                        magcoverpage = mag_entry['CoverPage']

                    issuedate = str(issuedate).zfill(4)  # for sorting issue numbers

                    # is this issue already in the database?
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?',
                                           (title, issuedate))
                    new_entry = False
                    if not iss_entry or iss_entry['IssueFile'] != issuefile:
                        new_entry = True  # new entry or name changed
                        if not iss_entry:
                            logger.debug("Adding issue %s %s" % (title, issuedate))
                        else:
                            logger.debug("Updating issue %s %s" % (title, issuedate))
                        controlValueDict = {"Title": title, "IssueDate": issuedate}
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)

                    ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore')
                    with open(ignorefile, 'a'):
                        os.utime(ignorefile, None)

                    createMagCover(issuefile,  pagenum=magcoverpage, refresh=new_entry)
                    lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_entry)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover": os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict, controlValueDict)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            magcount = myDB.match("select count(*) from magazines")
            isscount = myDB.match("select count(*) from issues")
            logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" %
                        (magcount['count(*)'], plural(magcount['count(*)']),
                         isscount['count(*)'], plural(isscount['count(*)'])))
        else:
            logger.info("Magazine scan complete")
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
Пример #8
0
def setSeries(serieslist=None, bookid=None, authorid=None, workid=None):
    """ set series details in series/member tables from the supplied dict
        and a displayable summary in book table
        serieslist is a tuple (SeriesID, SeriesNum, SeriesName)
        Return how many api hits and the original publication date if known """
    myDB = database.DBConnection()
    api_hits = 0
    originalpubdate = ''
    if bookid:
        # delete any old series-member entries
        myDB.action('DELETE from member WHERE BookID=?', (bookid,))
        for item in serieslist:
            match = myDB.match('SELECT SeriesID from series where SeriesName=? COLLATE NOCASE', (item[2],))
            if match:
                seriesid = match['SeriesID']
                members, _api_hits = getSeriesMembers(seriesid, item[2])
                api_hits += _api_hits
            else:
                # new series, need to set status and get SeriesID
                if item[0]:
                    seriesid = item[0]
                    members, _api_hits = getSeriesMembers(seriesid, item[2])
                    api_hits += _api_hits
                else:
                    # no seriesid so generate it (row count + 1)
                    cnt = myDB.match("select count(*) as counter from series")
                    res = check_int(cnt['counter'], 0)
                    seriesid = str(res + 1)
                    members = []
                myDB.action('INSERT into series VALUES (?, ?, ?, ?, ?)',
                            (seriesid, item[2], "Active", 0, 0), suppress='UNIQUE')

            if not workid or not authorid:
                book = myDB.match('SELECT AuthorID,WorkID from books where BookID=?', (bookid,))
                if book:
                    authorid = book['AuthorID']
                    workid = book['WorkID']
            if seriesid and authorid and workid:
                for member in members:
                    if member[3] == workid:
                        if check_year(member[5], past=1800, future=0):
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"BookDate": member[5], "OriginalPubDate": member[5]}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            originalpubdate = member[5]
                        break

                controlValueDict = {"BookID": bookid, "SeriesID": seriesid}
                newValueDict = {"SeriesNum": item[1], "WorkID": workid}
                myDB.upsert("member", newValueDict, controlValueDict)
                myDB.action('INSERT INTO seriesauthors ("SeriesID", "AuthorID") VALUES (?, ?)',
                            (seriesid, authorid), suppress='UNIQUE')
            else:
                if not authorid:
                    logger.debug('Unable to set series for book %s, no authorid' % bookid)
                elif not workid:
                    logger.debug('Unable to set series for book %s, no workid' % bookid)
                elif not seriesid:
                    logger.debug('Unable to set series for book %s, no seriesid' % bookid)
                return api_hits, originalpubdate

        series = ''
        for item in serieslist:
            newseries = "%s %s" % (item[2], item[1])
            newseries.strip()
            if series and newseries:
                series += '<br>'
            series += newseries
        myDB.action('UPDATE books SET SeriesDisplay=? WHERE BookID=?', (series, bookid))
        return api_hits, originalpubdate