示例#1
0
文件: auth32p.py 项目: csavage5/mylar
    def searchit(self):
        chk_id = None
        #logger.info('searchterm: %s' % self.searchterm)
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']

        if series_search.startswith('0-Day Comics Pack'):
            #issue = '21' = WED, #volume='2' = 2nd month
            torrentid = 22247  #2018
            publisher_search = None  #'2'  #2nd month
            comic_id = None
        elif all([
                self.searchterm['torrentid_32p'] is not None,
                self.searchterm['torrentid_32p'] != 'None'
        ]):
            torrentid = self.searchterm['torrentid_32p']
            comic_id = self.searchterm['id']
            publisher_search = self.searchterm['publisher']
        else:
            torrentid = None
            comic_id = self.searchterm['id']

            annualize = False
            if 'annual' in series_search.lower():
                series_search = re.sub(' annual', '',
                                       series_search.lower()).strip()
                annualize = True
            publisher_search = self.searchterm['publisher']
            spl = [x for x in self.publisher_list if x in publisher_search]
            for x in spl:
                publisher_search = re.sub(x, '', publisher_search).strip()
            #logger.info('publisher search set to : %s' % publisher_search)

            # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
            if comic_id:
                chk_id = helpers.checkthe_id(comic_id)

            if any([chk_id is None, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                #generate the dynamic name of the series here so we can match it up
                as_d = filechecker.FileChecker()
                as_dinfo = as_d.dynamic_replace(series_search)
                mod_series = re.sub('\|', '',
                                    as_dinfo['mod_seriesname']).strip()
                as_puinfo = as_d.dynamic_replace(publisher_search)
                pub_series = as_puinfo['mod_seriesname']

                logger.fdebug('series_search: %s' % series_search)

                if '/' in series_search:
                    series_search = series_search[:series_search.find('/')]
                if ':' in series_search:
                    series_search = series_search[:series_search.find(':')]
                if ',' in series_search:
                    series_search = series_search[:series_search.find(',')]

                logger.fdebug('config.search_32p: %s' %
                              mylar.CONFIG.SEARCH_32P)
                if mylar.CONFIG.SEARCH_32P is False:
                    url = 'https://walksoftly.itsaninja.party/serieslist.php'
                    params = {
                        'series': re.sub('\|', '', mod_series.lower()).strip()
                    }  #series_search}
                    logger.fdebug('search query: %s' %
                                  re.sub('\|', '', mod_series.lower()).strip())
                    try:
                        t = requests.get(
                            url,
                            params=params,
                            verify=True,
                            headers={
                                'USER-AGENT':
                                mylar.USER_AGENT[:mylar.USER_AGENT.find('/') +
                                                 7] +
                                mylar.USER_AGENT[mylar.USER_AGENT.find('(') +
                                                 1]
                            })
                    except requests.exceptions.RequestException as e:
                        logger.warn(e)
                        return "no results"

                    if t.status_code == '619':
                        logger.warn('[%s] Unable to retrieve data from site.' %
                                    t.status_code)
                        return "no results"
                    elif t.status_code == '999':
                        logger.warn(
                            '[%s] No series title was provided to the search query.'
                            % t.status_code)
                        return "no results"

                    try:
                        results = t.json()
                    except:
                        results = t.text

                    if len(results) == 0:
                        logger.warn('No results found for search on 32P.')
                        return "no results"

#        with cfscrape.create_scraper(delay=15) as s:
#            s.headers = self.headers
#            cj = LWPCookieJar(os.path.join(mylar.CONFIG.SECURE_DIR, ".32p_cookies.dat"))
#            cj.load()
#            s.cookies = cj
        data = []
        pdata = []
        pubmatch = False

        if any([
                series_search.startswith('0-Day Comics Pack'), torrentid
                is not None
        ]):
            data.append({"id": torrentid, "series": series_search})
        else:
            if any([not chk_id, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                if mylar.CONFIG.SEARCH_32P is True:
                    url = 'https://32pag.es/torrents.php'  #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(
                        1)  #just to make sure we don't hammer, 1s pause.
                    t = self.session.get(url,
                                         params=params,
                                         verify=True,
                                         allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class": "object-qtip"},
                                            {"data-type": "torrentgroup"})

                for r in results:
                    if mylar.CONFIG.SEARCH_32P is True:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|', '',
                                          as_dinfo['mod_seriesname']).strip()
                    logger.fdebug('searchresult: %s --- %s [%s]' %
                                  (seriesresult, mod_series, publisher_search))
                    if seriesresult.lower() == mod_series.lower():
                        logger.fdebug('[MATCH] %s [%s]' %
                                      (torrentname, torrentid))
                        data.append({"id": torrentid, "series": torrentname})
                    elif publisher_search.lower() in seriesresult.lower():
                        logger.fdebug('[MATCH] Publisher match.')
                        tmp_torrentname = re.sub(publisher_search.lower(), '',
                                                 seriesresult.lower()).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip(
                        ) == mod_series.lower():
                            logger.fdebug('[MATCH] %s [%s]' %
                                          (torrentname, torrentid))
                            pdata.append({
                                "id": torrentid,
                                "series": torrentname
                            })
                            pubmatch = True

                logger.fdebug('%s series listed for searching that match.' %
                              len(data))
            else:
                logger.fdebug(
                    'Exact series ID already discovered previously. Setting to : %s [%s]'
                    % (chk_id['series'], chk_id['id']))
                pdata.append({"id": chk_id['id'], "series": chk_id['series']})
                pubmatch = True

        if all([len(data) == 0, len(pdata) == 0]):
            return "no results"
        else:
            dataset = []
            if len(data) > 0:
                dataset += data
            if len(pdata) > 0:
                dataset += pdata
            logger.fdebug(
                str(len(dataset)) +
                ' series match the tile being searched for on 32P...')

        if all([
                chk_id is None,
                not series_search.startswith('0-Day Comics Pack'),
                self.searchterm['torrentid_32p'] is not None,
                self.searchterm['torrentid_32p'] != 'None'
        ]) and any([len(data) == 1, len(pdata) == 1]):
            #update the 32p_reference so we avoid doing a url lookup next time
            helpers.checkthe_id(comic_id, dataset)
        else:
            if all([
                    not series_search.startswith('0-Day Comics Pack'),
                    self.searchterm['torrentid_32p'] is not None,
                    self.searchterm['torrentid_32p'] != 'None'
            ]):
                pass
            else:
                logger.debug(
                    'Unable to properly verify reference on 32P - will update the 32P reference point once the issue has been successfully matched against.'
                )

        results32p = []
        resultlist = {}

        for x in dataset:
            #for 0-day packs, issue=week#, volume=month, id=0-day year pack (ie.issue=21&volume=2 for feb.21st)
            payload = {
                "action": "groupsearch",
                "id": x['id'],  #searchid,
                "issue": issue_search
            }
            #in order to match up against 0-day stuff, volume has to be none at this point
            #when doing other searches tho, this should be allowed to go through
            #if all([volume_search != 'None', volume_search is not None]):
            #    payload.update({'volume': re.sub('v', '', volume_search).strip()})
            if series_search.startswith('0-Day Comics Pack'):
                payload.update({"volume": volume_search})

            payload = json.dumps(payload)
            payload = json.loads(payload)

            logger.fdebug('payload: %s' % payload)
            url = 'https://32pag.es/ajax.php'
            time.sleep(1)  #just to make sure we don't hammer, 1s pause.
            try:
                d = self.session.get(url,
                                     params=payload,
                                     verify=True,
                                     allow_redirects=True)
            except Exception as e:
                logger.error('%s [%s] Could not POST URL %s' %
                             (self.module, e, url))

            try:
                searchResults = d.json()
            except Exception as e:
                searchResults = d.text
                logger.debug(
                    '[%s] %s Search Result did not return valid JSON, falling back on text: %s'
                    % (e, self.module, searchResults.text))
                return False

            if searchResults[
                    'status'] == 'success' and searchResults['count'] > 0:
                logger.fdebug('successfully retrieved %s search results' %
                              searchResults['count'])
                for a in searchResults['details']:
                    if series_search.startswith('0-Day Comics Pack'):
                        title = series_search
                    else:
                        title = self.searchterm['series'] + ' v' + a[
                            'volume'] + ' #' + a['issues']
                    results32p.append({
                        'link':
                        a['id'],
                        'title':
                        title,
                        'filesize':
                        a['size'],
                        'issues':
                        a['issues'],
                        'pack':
                        a['pack'],
                        'format':
                        a['format'],
                        'language':
                        a['language'],
                        'seeders':
                        a['seeders'],
                        'leechers':
                        a['leechers'],
                        'scanner':
                        a['scanner'],
                        'chkit': {
                            'id': x['id'],
                            'series': x['series']
                        },
                        'pubdate':
                        datetime.datetime.fromtimestamp(float(
                            a['upload_time'])).strftime(
                                '%a, %d %b %Y %H:%M:%S'),
                        'int_pubdate':
                        float(a['upload_time'])
                    })

            else:
                logger.fdebug('32P did not return any valid search results.')

        if len(results32p) > 0:
            resultlist['entries'] = sorted(results32p,
                                           key=itemgetter('pack', 'title'),
                                           reverse=False)
            logger.debug('%s Resultslist: %s' % (self.module, resultlist))
        else:
            resultlist = 'no results'

        return resultlist
示例#2
0
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None, queue=None):

    if cron and not mylar.LIBRARYSCAN:
        return

    if not dir:
        dir = mylar.CONFIG.COMIC_DIR

    if not os.path.isdir(dir):
        logger.warn('Cannot find directory: %s. Not scanning' % dir)
        return "Fail"


    logger.info('Scanning comic directory: %s' % dir)

    basedir = dir

    comic_list = []
    failure_list = []
    utter_failure_list = []
    comiccnt = 0
    extensions = ('cbr','cbz')
    cv_location = []
    cbz_retry = 0

    mylar.IMPORT_STATUS = 'Now attempting to parse files for additional information'
    myDB = db.DBConnection()
    #mylar.IMPORT_PARSED_COUNT #used to count what #/totalfiles the filename parser is currently on
    for r, d, f in os.walk(dir):
        for files in f:
            mylar.IMPORT_FILES +=1
            if any(files.lower().endswith('.' + x.lower()) for x in extensions):
                comicpath = os.path.join(r, files)
                if mylar.CONFIG.IMP_PATHS is True:
                    if myDB.select('SELECT * FROM comics JOIN issues WHERE issues.Status="Downloaded" AND ComicLocation=? AND issues.Location=?', [r, files]):
                        logger.info('Skipped known issue path: %s' % comicpath)
                        continue

                comic = files
                if not os.path.exists(comicpath):
                    logger.fdebug(f'''Comic: {comic} doesn't actually exist - assuming it is a symlink to a nonexistant path.''')
                    continue

                comicsize = os.path.getsize(comicpath)
                logger.fdebug('Comic: ' + comic + ' [' + comicpath + '] - ' + str(comicsize) + ' bytes')

                try:
                    t = filechecker.FileChecker(dir=r, file=comic)
                    results = t.listFiles()

                    #logger.info(results)
                    #'type':           re.sub('\.','', filetype).strip(),
                    #'sub':            path_list,
                    #'volume':         volume,
                    #'match_type':     match_type,
                    #'comicfilename':  filename,
                    #'comiclocation':  clocation,
                    #'series_name':    series_name,
                    #'series_volume':  issue_volume,
                    #'series_year':    issue_year,
                    #'justthedigits':  issue_number,
                    #'annualcomicid':  annual_comicid,
                    #'scangroup':      scangroup}


                    if results:
                        resultline = '[PARSE-' + results['parse_status'].upper() + ']'
                        resultline += '[SERIES: ' + results['series_name'] + ']'
                        if results['series_volume'] is not None:
                            resultline += '[VOLUME: ' + results['series_volume'] + ']'
                        if results['issue_year'] is not None:
                            resultline += '[ISSUE YEAR: ' + str(results['issue_year']) + ']'
                        if results['issue_number'] is not None:
                            resultline += '[ISSUE #: ' + results['issue_number'] + ']'
                        logger.fdebug(resultline)
                    else:
                        logger.fdebug('[PARSED] FAILURE.')
                        continue

                    # We need the unicode path to use for logging, inserting into database
                    unicode_comic_path = comicpath

                    if results['parse_status'] == 'success':
                        comic_list.append({'ComicFilename':           comic,
                                           'ComicLocation':           comicpath,
                                           'ComicSize':               comicsize,
                                           'Unicode_ComicLocation':   unicode_comic_path,
                                           'parsedinfo':              {'series_name':    results['series_name'],
                                                                       'series_volume':  results['series_volume'],
                                                                       'issue_year':     results['issue_year'],
                                                                       'issue_number':   results['issue_number']}
                                           })
                        comiccnt +=1
                        mylar.IMPORT_PARSED_COUNT +=1
                    else:
                        failure_list.append({'ComicFilename':           comic,
                                             'ComicLocation':           comicpath,
                                             'ComicSize':               comicsize,
                                             'Unicode_ComicLocation':   unicode_comic_path,
                                             'parsedinfo':              {'series_name':    results['series_name'],
                                                                         'series_volume':  results['series_volume'],
                                                                         'issue_year':     results['issue_year'],
                                                                         'issue_number':   results['issue_number']}
                                           })
                        mylar.IMPORT_FAILURE_COUNT +=1
                        if comic.endswith('.cbz'):
                            cbz_retry +=1

                except Exception as e:
                    logger.info('bang')
                    utter_failure_list.append({'ComicFilename':           comic,
                                               'ComicLocation':           comicpath,
                                               'ComicSize':               comicsize,
                                               'Unicode_ComicLocation':   unicode_comic_path,
                                               'parsedinfo':              None,
                                               'error':                   e
                                             })
                    logger.info('[' + str(e) + '] FAILURE encountered. Logging the error for ' + comic + ' and continuing...')
                    mylar.IMPORT_FAILURE_COUNT +=1
                    if comic.endswith('.cbz'):
                        cbz_retry +=1
                    continue

            if 'cvinfo' in files:
                cv_location.append(r)
                logger.fdebug('CVINFO found: ' + os.path.join(r))

    mylar.IMPORT_TOTALFILES = comiccnt
    logger.info('I have successfully discovered & parsed a total of ' + str(comiccnt) + ' files....analyzing now')
    logger.info('I have not been able to determine what ' + str(len(failure_list)) + ' files are')
    logger.info('However, ' + str(cbz_retry) + ' out of the ' + str(len(failure_list)) + ' files are in a cbz format, which may contain metadata.')
    logger.info('[ERRORS] I have encountered ' + str(len(utter_failure_list)) + ' file-scanning errors during the scan, but have recorded the necessary information.')
    mylar.IMPORT_STATUS = 'Successfully parsed ' + str(comiccnt) + ' files'
    #return queue.put(valreturn)

    if len(utter_failure_list) > 0:
        logger.fdebug('Failure list: %s' % utter_failure_list)

    #let's load in the watchlist to see if we have any matches.
    logger.info("loading in the watchlist to see if a series is being watched already...")
    watchlist = myDB.select("SELECT * from comics")
    ComicName = []
    DisplayName = []
    ComicYear = []
    ComicPublisher = []
    ComicTotal = []
    ComicID = []
    ComicLocation = []

    AltName = []
    watchcnt = 0

    watch_kchoice = []
    watchchoice = {}
    import_by_comicids = []
    import_comicids = {}

    for watch in watchlist:
        #use the comicname_filesafe to start
        watchdisplaycomic = watch['ComicName']
        # let's clean up the name, just in case for comparison purposes...
        watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['ComicName_Filesafe'])
        #watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip()

        if ' the ' in watchcomic.lower():
            #drop the 'the' from the watchcomic title for proper comparisons.
            watchcomic = watchcomic[-4:]

        alt_chk = "no" # alt-checker flag (default to no)

        # account for alternate names as well
        if watch['AlternateSearch'] is not None and watch['AlternateSearch'] != 'None':
            altcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', '', watch['AlternateSearch'])
            #altcomic = re.sub('\s+', ' ', str(altcomic)).strip()
            AltName.append(altcomic)
            alt_chk = "yes"  # alt-checker flag

        ComicName.append(watchcomic)
        DisplayName.append(watchdisplaycomic)
        ComicYear.append(watch['ComicYear'])
        ComicPublisher.append(watch['ComicPublisher'])
        ComicTotal.append(watch['Total'])
        ComicID.append(watch['ComicID'])
        ComicLocation.append(watch['ComicLocation'])
        watchcnt+=1

    logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.")

    ripperlist=['digital-',
                'empire',
                'dcp']

    watchfound = 0

    datelist = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
#    datemonth = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':$
#    #search for number as text, and change to numeric
#    for numbs in basnumbs:
#        #logger.fdebug("numbs:" + str(numbs))
#        if numbs in ComicName.lower():
#            numconv = basnumbs[numbs]
#            #logger.fdebug("numconv: " + str(numconv))

    issueid_list = []
    cvscanned_loc = None
    cvinfo_CID = None
    cnt = 0
    mylar.IMPORT_STATUS = '[0%] Now parsing individual filenames for metadata if available'

    for i in comic_list:
        mylar.IMPORT_STATUS = '[' + str(cnt) + '/' + str(comiccnt) + '] Now parsing individual filenames for metadata if available'
        logger.fdebug('Analyzing : ' + i['ComicFilename'])
        comfilename = i['ComicFilename']
        comlocation = i['ComicLocation']
        issueinfo = None
        #probably need to zero these issue-related metadata to None so we can pick the best option
        issuevolume = None

        #Make sure cvinfo is checked for FIRST (so that CID can be attached to all files properly thereafter as they're scanned in)
        if os.path.dirname(comlocation) in cv_location and os.path.dirname(comlocation) != cvscanned_loc:

        #if comfilename == 'cvinfo':
            logger.info('comfilename: ' + comfilename)
            logger.info('cvscanned_loc: ' + str(cv_location))
            logger.info('comlocation: ' + os.path.dirname(comlocation))
            #if cvscanned_loc != comlocation:
            try:
                with open(os.path.join(os.path.dirname(comlocation), 'cvinfo')) as f:
                    urllink = f.readline()

                if urllink:
                    cid = urllink.strip()
                    pattern = re.compile(r"^.*?\b(49|4050)-(?P<num>\d{2,})\b.*$", re.I)
                    match = pattern.match(cid)
                    if match:
                        cvinfo_CID = match.group("num")
                        logger.info('CVINFO file located within directory. Attaching everything in directory that is valid to ComicID: ' + str(cvinfo_CID))
                        #store the location of the cvinfo so it's applied to the correct directory (since we're scanning multile direcorties usually)
                        cvscanned_loc = os.path.dirname(comlocation)
                else:
                    logger.error("Could not read cvinfo file properly (or it does not contain any data)")
            except (OSError, IOError):
                logger.error("Could not read cvinfo file properly (or it does not contain any data)")
        #else:
        #    don't scan in it again if it's already been done initially
        #    continue

        if mylar.CONFIG.IMP_METADATA:
            #if read tags is enabled during import, check here.
            if i['ComicLocation'].endswith('.cbz'):
                logger.fdebug('[IMPORT-CBZ] Metatagging checking enabled.')
                logger.info('[IMPORT-CBZ} Attempting to read tags present in filename: ' + i['ComicLocation'])
                try:
                    issueinfo = helpers.IssueDetails(i['ComicLocation'], justinfo=True)
                except:
                    logger.fdebug('[IMPORT-CBZ] Unable to retrieve metadata - possibly doesn\'t exist. Ignoring meta-retrieval')
                    pass
                else:
                    logger.info('issueinfo: ' + str(issueinfo))

                    if issueinfo is None or issueinfo['metadata'] is None:
                        logger.fdebug('[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.')
                        pass
                    else:
                        issuenotes_id = None
                        logger.info('[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.')
                        comicname = issueinfo['metadata']['series']
                        if comicname is not None:
                            logger.fdebug('[IMPORT-CBZ] Series Name: ' + comicname)
                            as_d = filechecker.FileChecker()
                            as_dyninfo = as_d.dynamic_replace(comicname)
                            logger.fdebug('Dynamic-ComicName: ' + as_dyninfo['mod_seriesname'])
                        else:
                            logger.fdebug('[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.')
                            issueinfo = None
                            issue_number = None

                        if issueinfo is not None:
                            try:
                                issueyear = issueinfo['metadata']['year']
                            except:
                                issueyear = None

                            #if the issue number is a non-numeric unicode string, this will screw up along with impID
                            issue_number = issueinfo['metadata']['issue_number']
                            if issue_number is not None:
                                logger.fdebug('[IMPORT-CBZ] Issue Number: ' + issue_number)
                            else:
                                issue_number = i['parsed']['issue_number']

                            if 'annual' in comicname.lower() or 'annual' in comfilename.lower():
                                if issue_number is None or issue_number == 'None':
                                    logger.info('Annual detected with no issue number present within metadata. Assuming year as issue.')
                                    try:
                                        issue_number = 'Annual ' + str(issueyear)
                                    except:
                                        issue_number = 'Annual ' + i['parsed']['issue_year']
                                else:
                                    logger.info('Annual detected with issue number present within metadata.')
                                    if 'annual' not in issue_number.lower():
                                        issue_number = 'Annual ' + issue_number
                                mod_series = re.sub('annual', '', comicname, flags=re.I).strip()
                            else:
                                mod_series = comicname

                            logger.fdebug('issue number SHOULD Be: ' + issue_number)

                            try:
                                issuetitle = issueinfo['metadata']['title']
                            except:
                                issuetitle = None
                            try:
                                issueyear = issueinfo['metadata']['year']
                            except:
                                issueyear = None
                            try:
                                issuevolume = str(issueinfo['metadata']['volume'])
                                if all([issuevolume is not None, issuevolume != 'None', not issuevolume.lower().startswith('v')]):
                                    issuevolume = 'v' + str(issuevolume)
                                if any([issuevolume is None, issuevolume == 'None']):
                                    logger.info('EXCEPT] issue volume is NONE')
                                    issuevolume = None
                                else:
                                    logger.fdebug('[TRY]issue volume is: ' + str(issuevolume))
                            except:
                                logger.fdebug('[EXCEPT]issue volume is: ' + str(issuevolume))
                                issuevolume = None

                            if any([comicname is None, comicname == 'None', issue_number is None, issue_number == 'None']):
                                logger.fdebug('[IMPORT-CBZ] Improperly tagged file as the metatagging is invalid. Ignoring meta and just parsing the filename.')
                                issueinfo = None
                                pass
                            else:
                                # if used by ComicTagger, Notes field will have the IssueID.
                                issuenotes = issueinfo['metadata']['notes']
                                logger.fdebug('[IMPORT-CBZ] Notes: ' + issuenotes)
                                if issuenotes is not None and issuenotes != 'None':
                                    if 'Issue ID' in issuenotes:
                                        st_find = issuenotes.find('Issue ID')
                                        tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
                                        if tmp_issuenotes_id.isdigit():
                                            issuenotes_id = tmp_issuenotes_id
                                            logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
                                    elif 'CVDB' in issuenotes:
                                        st_find = issuenotes.find('CVDB')
                                        tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
                                        if tmp_issuenotes_id.isdigit():
                                            issuenotes_id = tmp_issuenotes_id
                                            logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
                                    else:
                                        logger.fdebug('[IMPORT-CBZ] Unable to retrieve IssueID from meta-tagging. If there is other metadata present I will use that.')

                                logger.fdebug('[IMPORT-CBZ] Adding ' + comicname + ' to the import-queue!')
                                #impid = comicname + '-' + str(issueyear) + '-' + str(issue_number) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss)
                                impid = str(random.randint(1000000,99999999))
                                logger.fdebug('[IMPORT-CBZ] impid: ' + str(impid))
                                #make sure we only add in those issueid's which don't already have a comicid attached via the cvinfo scan above (this is for reverse-lookup of issueids)
                                issuepopulated = False
                                if cvinfo_CID is None:
                                    if issuenotes_id is None:
                                        logger.info('[IMPORT-CBZ] No ComicID detected where it should be. Bypassing this metadata entry and going the parsing route [' + comfilename + ']')
                                    else:
                                        #we need to store the impid here as well so we can look it up.
                                        issueid_list.append({'issueid':    issuenotes_id,
                                                             'importinfo': {'impid':       impid,
                                                                            'comicid':     None,
                                                                            'comicname':   comicname,
                                                                            'dynamicname': as_dyninfo['mod_seriesname'],
                                                                            'comicyear':   issueyear,
                                                                            'issuenumber': issue_number,
                                                                            'volume':      issuevolume,
                                                                            'comfilename': comfilename,
                                                                            'comlocation': comlocation}
                                                           })
                                        mylar.IMPORT_CID_COUNT +=1
                                        issuepopulated = True

                                if issuepopulated == False:
                                    if cvscanned_loc == os.path.dirname(comlocation):
                                        cv_cid = cvinfo_CID
                                        logger.fdebug('[IMPORT-CBZ] CVINFO_COMICID attached : ' + str(cv_cid))
                                    else:
                                        cv_cid = None
                                    import_by_comicids.append({
                                        "impid": impid,
                                        "comicid": cv_cid,
                                        "watchmatch": None,
                                        "displayname": mod_series,
                                        "comicname": comicname,
                                        "dynamicname": as_dyninfo['mod_seriesname'],
                                        "comicyear": issueyear,
                                        "issuenumber": issue_number,
                                        "volume": issuevolume,
                                        "issueid": issuenotes_id,
                                        "comfilename": comfilename,
                                        "comlocation": comlocation
                                                       })

                                    mylar.IMPORT_CID_COUNT +=1
                        else:
                            pass
                            #logger.fdebug(i['ComicFilename'] + ' is not in a metatagged format (cbz). Bypassing reading of the metatags')

        if issueinfo is None:
            if i['parsedinfo']['issue_number'] is None:
                if 'annual' in i['parsedinfo']['series_name'].lower():
                    logger.fdebug('Annual detected with no issue number present. Assuming year as issue.')##1 issue')
                    if i['parsedinfo']['issue_year'] is not None:
                        issuenumber = 'Annual ' + str(i['parsedinfo']['issue_year'])
                    else:
                        issuenumber = 'Annual 1'
            else:
                issuenumber = i['parsedinfo']['issue_number']

            if 'annual' in i['parsedinfo']['series_name'].lower():
                mod_series = re.sub('annual', '', i['parsedinfo']['series_name'], flags=re.I).strip()
                logger.fdebug('Annual detected with no issue number present. Assuming year as issue.')##1 issue')
                if i['parsedinfo']['issue_number'] is not None:
                    issuenumber = 'Annual ' + str(i['parsedinfo']['issue_number'])
                else:
                    if i['parsedinfo']['issue_year'] is not None:
                        issuenumber = 'Annual ' + str(i['parsedinfo']['issue_year'])
                    else:
                        issuenumber = 'Annual 1'
            else:
                mod_series = i['parsedinfo']['series_name']
                issuenumber = i['parsedinfo']['issue_number']


            logger.fdebug('[' + mod_series + '] Adding to the import-queue!')
            isd = filechecker.FileChecker()
            is_dyninfo = isd.dynamic_replace(mod_series) #helpers.conversion(mod_series))
            logger.fdebug('Dynamic-ComicName: ' + is_dyninfo['mod_seriesname'])

            #impid = dispname + '-' + str(result_comyear) + '-' + str(comiss) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss)
            impid = str(random.randint(1000000,99999999))
            logger.fdebug("impid: " + str(impid))
            if cvscanned_loc == os.path.dirname(comlocation):
                cv_cid = cvinfo_CID
                logger.fdebug('CVINFO_COMICID attached : ' + str(cv_cid))
            else:
                cv_cid = None

            if issuevolume is None:
                logger.fdebug('issue volume is : ' + str(issuevolume))
                if i['parsedinfo']['series_volume'] is None:
                    issuevolume = None
                else:
                    if str(i['parsedinfo']['series_volume'].lower()).startswith('v'):
                        issuevolume = i['parsedinfo']['series_volume']
                    else:
                        issuevolume = 'v' + str(i['parsedinfo']['series_volume'])
            else:
                logger.fdebug('issue volume not none : ' + str(issuevolume))
                if issuevolume.lower().startswith('v'):
                    issuevolume = issuevolume
                else:
                    issuevolume = 'v' + str(issuevolume)

            logger.fdebug('IssueVolume is : ' + str(issuevolume))

            import_by_comicids.append({
                "impid": impid,
                "comicid": cv_cid,
                "issueid": None,
                "watchmatch": None, #watchmatch (should be true/false if it already exists on watchlist)
                "displayname": mod_series,
                "comicname": i['parsedinfo']['series_name'],
                "dynamicname": is_dyninfo['mod_seriesname'].lower(),
                "comicyear": i['parsedinfo']['issue_year'],
                "issuenumber": issuenumber, #issuenumber,
                "volume": issuevolume,
                "comfilename": comfilename,
                "comlocation": comlocation #helpers.conversion(comlocation)
                                      })
        cnt+=1
    #logger.fdebug('import_by_ids: ' + str(import_by_comicids))

    #reverse lookup all of the gathered IssueID's in order to get the related ComicID
    reverse_issueids = []
    for x in issueid_list:
        reverse_issueids.append(x['issueid'])

    vals = []
    if len(reverse_issueids) > 0:
        mylar.IMPORT_STATUS = 'Now Reverse looking up ' + str(len(reverse_issueids)) + ' IssueIDs to get the ComicIDs'
        vals = mylar.cv.getComic(None, 'import', comicidlist=reverse_issueids)
        #logger.fdebug('vals returned:' + str(vals))

    if len(watch_kchoice) > 0:
        watchchoice['watchlist'] = watch_kchoice
        #logger.fdebug("watchchoice: " + str(watchchoice))

        logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.")
        wat = 0
        comicids = []

        if watchfound > 0:
            if mylar.CONFIG.IMP_MOVE:
                logger.info('You checked off Move Files...so that\'s what I am going to do') 
                #check to see if Move Files is enabled.
                #if not being moved, set the archive bit.
                logger.fdebug('Moving files into appropriate directory')
                while (wat < watchfound): 
                    watch_the_list = watchchoice['watchlist'][wat]
                    watch_comlocation = watch_the_list['ComicLocation']
                    watch_comicid = watch_the_list['ComicID']
                    watch_comicname = watch_the_list['ComicName']
                    watch_comicyear = watch_the_list['ComicYear']
                    watch_comiciss = watch_the_list['ComicIssue']
                    logger.fdebug('ComicLocation: ' + watch_comlocation)
                    orig_comlocation = watch_the_list['OriginalLocation']
                    orig_filename = watch_the_list['OriginalFilename'] 
                    logger.fdebug('Orig. Location: ' + orig_comlocation)
                    logger.fdebug('Orig. Filename: ' + orig_filename)
                    #before moving check to see if Rename to Mylar structure is enabled.
                    if mylar.CONFIG.IMP_RENAME:
                        logger.fdebug('Renaming files according to configuration details : ' + str(mylar.CONFIG.FILE_FORMAT))
                        renameit = helpers.rename_param(watch_comicid, watch_comicname, watch_comicyear, watch_comiciss)
                        nfilename = renameit['nfilename']

                        dst_path = os.path.join(watch_comlocation, nfilename)
                        if str(watch_comicid) not in comicids:
                            comicids.append(watch_comicid)
                    else:
                        logger.fdebug('Renaming files not enabled, keeping original filename(s)')
                        dst_path = os.path.join(watch_comlocation, orig_filename)

                    #os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext)))
                    #src = os.path.join(, str(nfilename + ext))
                    logger.fdebug('I am going to move ' + orig_comlocation + ' to ' + dst_path)
                    try:
                        shutil.move(orig_comlocation, dst_path)
                    except (OSError, IOError):
                        logger.info("Failed to move directory - check directories and manually re-run.")
                    wat+=1
            else:
                # if move files isn't enabled, let's set all found comics to Archive status :)
                while (wat < watchfound):
                    watch_the_list = watchchoice['watchlist'][wat]
                    watch_comicid = watch_the_list['ComicID']
                    watch_issue = watch_the_list['ComicIssue']
                    logger.fdebug('ComicID: ' + str(watch_comicid))
                    logger.fdebug('Issue#: ' + str(watch_issue))
                    issuechk = myDB.selectone("SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone()
                    if issuechk is None:
                        logger.fdebug('No matching issues for this comic#')
                    else:
                        logger.fdebug('...Existing status: ' + str(issuechk['Status']))
                        control = {"IssueID":   issuechk['IssueID']}
                        values = {"Status":   "Archived"}
                        logger.fdebug('...changing status of ' + str(issuechk['Issue_Number']) + ' to Archived ')
                        myDB.upsert("issues", values, control)
                        if str(watch_comicid) not in comicids:
                            comicids.append(watch_comicid)
                    wat+=1
            if comicids is None: pass
            else:
                c_upd = len(comicids)
                c = 0
                while (c < c_upd ):
                    logger.fdebug('Rescanning.. ' + str(c))
                    updater.forceRescan(c) 
        if not len(import_by_comicids):
            return "Completed"

    if len(import_by_comicids) > 0 or len(vals) > 0:
        #import_comicids['comic_info'] = import_by_comicids
        #if vals:
        #    import_comicids['issueid_info'] = vals
        #else:
        #    import_comicids['issueid_info'] = None
        if vals:
             cvimport_comicids = vals
             import_cv_ids = len(vals)
        else:
             cvimport_comicids = None
             import_cv_ids = 0
    else:
        import_cv_ids = 0
        cvimport_comicids = None
                    
    return {'import_by_comicids':  import_by_comicids, 
            'import_count':        len(import_by_comicids),
            'CV_import_comicids':  cvimport_comicids,
            'import_cv_ids':       import_cv_ids,
            'issueid_list':        issueid_list,
            'failure_list':        failure_list,
            'utter_failure_list':  utter_failure_list}
示例#3
0
    def update_db(self):

        # mylar.MAINTENANCE_UPDATE will indicate what's being updated in the db
        if mylar.MAINTENANCE_UPDATE:
            self.db_version_check(display=False)

            # backup mylar.db here
            self.backup_files(dbs=True)

            for dmode in mylar.MAINTENANCE_UPDATE:
                if dmode['mode'] == 'rss update':
                    logger.info(
                        '[MAINTENANCE-MODE][DB-CONVERSION] Updating dB due to RSS table conversion'
                    )
                    if dmode['resume'] > 0:
                        logger.info(
                            '[MAINTENANCE-MODE][DB-CONVERSION][DB-RECOVERY] Attempting to resume conversion from previous run (starting at record: %s)'
                            % dmode['resume'])

                #force set logging to warning level only so the progress indicator can be displayed in console
                prev_log_level = mylar.LOG_LEVEL
                self.toggle_logging(level=0)

                if dmode['mode'] == 'rss update':
                    self.sql_attachmylar()

                    row_cnt = self.dbmylar.execute(
                        "SELECT COUNT(rowid) as count FROM rssdb")
                    rowcnt = row_cnt.fetchone()[0]
                    mylar.MAINTENANCE_DB_TOTAL = rowcnt

                    if dmode['resume'] > 0:
                        xt = self.dbmylar.execute(
                            "SELECT rowid, Title FROM rssdb WHERE rowid >= ? ORDER BY rowid ASC",
                            [dmode['resume']])
                    else:
                        xt = self.dbmylar.execute(
                            "SELECT rowid, Title FROM rssdb ORDER BY rowid ASC"
                        )
                    xlist = xt.fetchall()

                    mylar.MAINTENANCE_DB_COUNT = 0

                    if xlist is None:
                        print('Nothing in the rssdb to update. Ignoring.')
                        return True

                    try:
                        if dmode['resume'] > 0 and xlist is not None:
                            logger.info('resume set at : %s' %
                                        (xlist[dmode['resume']], ))
                            #xlist[dmode['resume']:]
                            mylar.MAINTENANCE_DB_COUNT = dmode['resume']
                    except Exception as e:
                        print(
                            '[ERROR:%s] - table resume location is not accureate. Starting from start, but this should go quick..'
                            % e)
                        xt = self.dbmylar.execute(
                            "SELECT rowid, Title FROM rssdb ORDER BY rowid ASC"
                        )
                        xlist = xt.fetchall()
                        dmode['resume'] = 0

                    if xlist:
                        resultlist = []
                        delete_rows = []
                        for x in self.progressBar(xlist,
                                                  prefix='Progress',
                                                  suffix='Complete',
                                                  length=50,
                                                  resume=dmode['resume']):

                            #signal capture here since we can't do it as per normal
                            if any([
                                    mylar.SIGNAL == 'shutdown',
                                    mylar.SIGNAL == 'restart'
                            ]):
                                try:
                                    self.dbmylar.executemany(
                                        "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?",
                                        (resultlist))
                                    self.sql_closemylar()
                                except Exception as e:
                                    print('error: %s' % e)
                                else:
                                    send_it = {
                                        'mode': dmode['mode'],
                                        'version': self.db_version,
                                        'status': 'incomplete',
                                        'total': mylar.MAINTENANCE_DB_TOTAL,
                                        'current': mylar.MAINTENANCE_DB_COUNT,
                                        'last_run': helpers.utctimestamp()
                                    }
                                    self.db_update_status(send_it)

                                #toggle back the logging level to what it was originally.
                                self.toggle_logging(level=prev_log_level)

                                if mylar.SIGNAL == 'shutdown':
                                    logger.info(
                                        '[MAINTENANCE-MODE][DB-CONVERSION][SHUTDOWN]Shutting Down...'
                                    )
                                    return False
                                else:
                                    logger.info(
                                        '[MAINTENANCE-MODE][DB-CONVERSION][RESTART]Restarting...'
                                    )
                                    return True

                            mylar.MAINTENANCE_DB_COUNT += 1
                            if not x[1]:
                                logger.fdebug(
                                    '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s'
                                    % x[1])
                                delete_rows.append((x[0], ))
                                continue
                            try:
                                if any(
                                        ext in x[1] for ext in
                                    ['yenc', '.pdf', '.rar', '.mp4', '.avi']):
                                    logger.fdebug(
                                        '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s'
                                        % x[1])
                                    delete_rows.append((x[0], ))
                                    continue
                                else:
                                    flc = filechecker.FileChecker(file=x[1])
                                    filelist = flc.listFiles()
                            except Exception as e:
                                logger.fdebug(
                                    '[MAINTENANCE-MODE][DB-CONVERSION][JUNK-NAME] %s'
                                    % x[1])
                                delete_rows.append((x[0], ))
                                continue
                            else:
                                if all([
                                        filelist['series_name'] != '',
                                        filelist['series_name'] is not None
                                ]) and filelist['issue_number'] != '-':
                                    issuenumber = filelist['issue_number']
                                    seriesname = re.sub(
                                        r'[\u2014|\u2013|\u2e3a|\u2e3b]', '-',
                                        filelist['series_name']).strip()
                                    if seriesname.endswith(
                                            '-') and '#' in seriesname[-6:]:
                                        ck1 = seriesname.rfind('#')
                                        ck2 = seriesname.rfind('-')
                                        if seriesname[ck1 + 1:ck2 -
                                                      1].strip().isdigit():
                                            issuenumber = '%s %s' % (
                                                seriesname[ck1:].strip(),
                                                issuenumber)
                                            seriesname = seriesname[:ck1 -
                                                                    1].strip()
                                            issuenumber.strip()
                                    resultlist.append(
                                        (issuenumber, seriesname.strip(),
                                         x[0]))

                                if len(resultlist) > 500:
                                    # write it out every 5000 records.
                                    try:
                                        logger.fdebug('resultlist: %s' %
                                                      (resultlist, ))
                                        self.dbmylar.executemany(
                                            "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?",
                                            (resultlist))
                                        self.sql_closemylar()
                                        # update the update_db so if it has to resume it doesn't from the beginning or wrong point ( last 5000th write ).
                                        send_it = {
                                            'mode': dmode['mode'],
                                            'version': self.db_version,
                                            'status': 'incomplete',
                                            'total':
                                            mylar.MAINTENANCE_DB_TOTAL,
                                            'current':
                                            mylar.MAINTENANCE_DB_COUNT,
                                            'last_run': helpers.utctimestamp()
                                        }
                                        self.db_update_status(send_it)

                                    except Exception as e:
                                        print('error: %s' % e)
                                        return False
                                    else:
                                        logger.fdebug('reattaching')
                                        self.sql_attachmylar()
                                        resultlist = []

                        try:
                            if len(resultlist) > 0:
                                self.dbmylar.executemany(
                                    "UPDATE rssdb SET Issue_Number=?, ComicName=? WHERE rowid=?",
                                    (resultlist))
                                self.sql_closemylar()
                        except Exception as e:
                            print('error: %s' % e)
                            return False
                        else:
                            try:
                                send_it = {
                                    'mode': dmode['mode'],
                                    'version': 1,
                                    'status': 'complete',
                                    'total': mylar.MAINTENANCE_DB_TOTAL,
                                    'current': mylar.MAINTENANCE_DB_COUNT,
                                    'last_run': helpers.utctimestamp()
                                }
                            except Exception as e:
                                print('error_sendit: %s' % e)
                            else:
                                self.db_update_status(send_it)

                            if delete_rows:
                                # only do this on completion, or else the rowids will be different and it will mess up a rerun
                                try:
                                    self.sql_attachmylar()
                                    print(
                                        '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Removing %s invalid RSS entries from table...'
                                        % len(delete_rows))
                                    self.dbmylar.executemany(
                                        "DELETE FROM rssdb WHERE rowid=?",
                                        (delete_rows))
                                    self.sql_closemylar()
                                except Exception as e:
                                    print('error: %s' % e)
                                else:
                                    self.sql_attachmylar()
                                    print(
                                        '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Cleaning up...'
                                    )
                                    self.dbmylar.execute("VACUUM")
                            else:
                                print(
                                    '[MAINTENANCE-MODE][DB-CONVERSION][CLEANUP] Cleaning up...'
                                )
                                self.sql_attachmylar()
                                self.dbmylar.execute("VACUUM")

                            self.sql_closemylar()

                            #toggle back the logging level to what it was originally.
                            self.toggle_logging(level=prev_log_level)
                            logger.info(
                                '[MAINTENANCE-MODE][DB-CONVERSION] Updating dB complete! (%s / %s)'
                                % (mylar.MAINTENANCE_DB_COUNT,
                                   mylar.MAINTENANCE_DB_TOTAL))
                            mylar.MAINTENANCE_UPDATE[:] = [
                                x for x in mylar.MAINTENANCE_UPDATE
                                if not ('rss update' == x.get('mode'))
                            ]

        else:
            mylar.MAINTENANCE_DB_COUNT = 0
            logger.info(
                '[MAINTENANCE-MODE] Update DB set to start - but nothing was provided as to what. Returning to non-maintenance mode'
            )
        return True
示例#4
0
文件: filers.py 项目: qubidt/mylar3
    def walk_the_walk(self):
        folder_location = mylar.CONFIG.FOLDER_CACHE_LOCATION
        if folder_location is None:
            return {'status': False}

        logger.info('checking locally...')
        filelist = None

        logger.info('check_folder_cache: %s' % (mylar.CHECK_FOLDER_CACHE))
        if mylar.CHECK_FOLDER_CACHE is not None:
            rd = mylar.CHECK_FOLDER_CACHE #datetime.datetime.utcfromtimestamp(mylar.CHECK_FOLDER_CACHE)
            rd_mins = rd + datetime.timedelta(seconds = 600)  #10 minute cache retention
            rd_now = datetime.datetime.utcfromtimestamp(time.time())
            if calendar.timegm(rd_mins.utctimetuple()) > calendar.timegm(rd_now.utctimetuple()):
                # if < 10 minutes since last check, use cached listing
                logger.info('using cached folder listing since < 10 minutes since last file check.')
                filelist = mylar.FOLDER_CACHE

        if filelist is None:
            logger.info('generating new directory listing for folder_cache')
            flc = filechecker.FileChecker(folder_location, justparse=True, pp_mode=True)
            mylar.FOLDER_CACHE = flc.listFiles()
            mylar.CHECK_FOLDER_CACHE = datetime.datetime.utcfromtimestamp(helpers.utctimestamp())

        local_status = False
        filepath = None
        filename = None
        for fl in mylar.FOLDER_CACHE['comiclist']:
            logger.info('fl: %s' % (fl,))
            if self.arc is not None:
                comicname = self.arc['ComicName']
                corrected_type = None
                alternatesearch = None
                booktype = self.arc['Type']
                publisher = self.arc['Publisher']
                issuenumber = self.arc['IssueNumber']
                issuedate = self.arc['IssueDate']
                issuename = self.arc['IssueName']
                issuestatus = self.arc['Status']
            elif self.comic is not None:
                comicname = self.comic['ComicName']
                booktype = self.comic['Type']
                corrected_type = self.comic['Corrected_Type']
                alternatesearch = self.comic['AlternateSearch']
                publisher = self.comic['ComicPublisher']
                issuenumber = self.issue['Issue_Number']
                issuedate = self.issue['IssueDate']
                issuename = self.issue['IssueName']
                issuestatus = self.issue['Status']
            else:
                # weekly - (one/off)
                comicname = self.weekly['COMIC']
                booktype = self.weekly['format']
                corrected_type = None
                alternatesearch = None
                publisher = self.weekly['PUBLISHER']
                issuenumber = self.weekly['ISSUE']
                issuedate = self.weekly['SHIPDATE']
                issuename = None
                issuestatus = self.weekly['STATUS']

            if booktype is not None:
                if (all([booktype != 'Print', booktype != 'Digital', booktype != 'None', booktype is not None]) and corrected_type != 'Print') or any([corrected_type == 'TPB', corrected_type == 'GN', corrected_type == 'HC']):
                    if booktype == 'One-Shot' and corrected_type is None:
                        booktype = 'One-Shot'
                    else:
                        if booktype == 'GN' and corrected_type is None:
                            booktype = 'GN'
                        elif booktype == 'HC' and corrected_type is None:
                            booktype = 'HC'
                        else:
                            booktype = 'TPB'

            wm = filechecker.FileChecker(watchcomic=comicname, Publisher=publisher, AlternateSearch=alternatesearch)
            watchmatch = wm.matchIT(fl)

            logger.info('watchmatch: %s' % (watchmatch,))

            # this is all for a really general type of match - if passed, the post-processing checks will do the real brunt work
            if watchmatch['process_status'] == 'fail':
                continue

            if watchmatch['justthedigits'] is not None:
                temploc= watchmatch['justthedigits'].replace('_', ' ')
                if "Director's Cut" not in temploc:
                    temploc = re.sub('[\#\']', '', temploc)
            else:
                if any([booktype == 'TPB', booktype =='GN', booktype == 'HC', booktype == 'One-Shot']):
                    temploc = '1'
                else:
                    temploc = None
                    continue

            int_iss = helpers.issuedigits(issuenumber)
            issyear = issuedate[:4]
            old_status = issuestatus
            issname = issuename


            if temploc is not None:
                fcdigit = helpers.issuedigits(temploc)
            elif any([booktype == 'TPB', booktype == 'GN', booktype == 'GC', booktype == 'One-Shot']) and temploc is None:
                fcdigit = helpers.issuedigits('1')

            if int(fcdigit) == int_iss:
                logger.fdebug('[%s] Issue match - #%s' % (self.issueid, self.issue['Issue_Number']))
                local_status = True
                if watchmatch['sub'] is None:
                    filepath = watchmatch['comiclocation']
                    filename = watchmatch['comicfilename']
                else:
                    filepath = os.path.join(watchmatch['comiclocation'], watchmatch['sub'])
                    filename = watchmatch['comicfilename']
                break


        #if local_status is True:
            #try:
            #    copied_folder = os.path.join(mylar.CONFIG.CACHE_DIR, 'tmp_filer')
            #    if os.path.exists(copied_folder):
            #        shutil.rmtree(copied_folder)
            #    os.mkdir(copied_folder)
            #    logger.info('created temp directory: %s' % copied_folder)
            #    shutil.copy(os.path.join(filepath, filename), copied_folder)

            #except Exception as e:
            #    logger.error('[%s] error: %s' % (e, filepath))
            #    filepath = None
            #    local_status = False
            #else:
            #filepath = os.path.join(copied_folder, filename)
            #logger.info('Successfully copied file : %s' % filepath)

        return {'status': local_status,
                'filename': filename,
                'filepath': filepath}
示例#5
0
def libraryScan(dir=None,
                append=False,
                ComicID=None,
                ComicName=None,
                cron=None,
                queue=None):

    if cron and not mylar.LIBRARYSCAN:
        return

    if not dir:
        dir = mylar.CONFIG.COMIC_DIR

    # If we're appending a dir, it's coming from the post processor which is
    # already bytestring
    if not append:
        dir = dir.encode(mylar.SYS_ENCODING)

    if not os.path.isdir(dir):
        logger.warn('Cannot find directory: %s. Not scanning' %
                    dir.decode(mylar.SYS_ENCODING, 'replace'))
        return "Fail"

    logger.info('Scanning comic directory: %s' %
                dir.decode(mylar.SYS_ENCODING, 'replace'))

    basedir = dir

    comic_list = []
    failure_list = []
    utter_failure_list = []
    comiccnt = 0
    extensions = ('cbr', 'cbz')
    cv_location = []
    cbz_retry = 0

    mylar.IMPORT_STATUS = 'Now attempting to parse files for additional information'
    myDB = db.DBConnection()
    #mylar.IMPORT_PARSED_COUNT #used to count what #/totalfiles the filename parser is currently on
    for r, d, f in os.walk(dir):
        for files in f:
            mylar.IMPORT_FILES += 1
            if any(files.lower().endswith('.' + x.lower())
                   for x in extensions):
                comicpath = os.path.join(r, files)
                if myDB.select(
                        'SELECT * FROM comics JOIN issues WHERE issues.Status="Downloaded" AND ComicLocation=? AND issues.Location=?',
                    [
                        r.decode(mylar.SYS_ENCODING),
                        files.decode(mylar.SYS_ENCODING)
                    ]):
                    logger.info('Skipped known issue path: %r', comicpath)
                    continue

                comic = files
                comicsize = os.path.getsize(comicpath)
                logger.fdebug('Comic: ' + comic + ' [' + comicpath + '] - ' +
                              str(comicsize) + ' bytes')

                try:
                    t = filechecker.FileChecker(dir=r, file=comic)
                    results = t.listFiles()

                    #logger.info(results)
                    #'type':           re.sub('\.','', filetype).strip(),
                    #'sub':            path_list,
                    #'volume':         volume,
                    #'match_type':     match_type,
                    #'comicfilename':  filename,
                    #'comiclocation':  clocation,
                    #'series_name':    series_name,
                    #'series_volume':  issue_volume,
                    #'series_year':    issue_year,
                    #'justthedigits':  issue_number,
                    #'annualcomicid':  annual_comicid,
                    #'scangroup':      scangroup}

                    if results:
                        resultline = '[PARSE-' + results['parse_status'].upper(
                        ) + ']'
                        resultline += '[SERIES: ' + results['series_name'] + ']'
                        if results['series_volume'] is not None:
                            resultline += '[VOLUME: ' + results[
                                'series_volume'] + ']'
                        if results['issue_year'] is not None:
                            resultline += '[ISSUE YEAR: ' + str(
                                results['issue_year']) + ']'
                        if results['issue_number'] is not None:
                            resultline += '[ISSUE #: ' + results[
                                'issue_number'] + ']'
                        logger.fdebug(resultline)
                    else:
                        logger.fdebug('[PARSED] FAILURE.')
                        continue

                    # We need the unicode path to use for logging, inserting into database
                    unicode_comic_path = comicpath.decode(
                        mylar.SYS_ENCODING, 'replace')

                    if results['parse_status'] == 'success':
                        comic_list.append({
                            'ComicFilename': comic,
                            'ComicLocation': comicpath,
                            'ComicSize': comicsize,
                            'Unicode_ComicLocation': unicode_comic_path,
                            'parsedinfo': {
                                'series_name': results['series_name'],
                                'series_volume': results['series_volume'],
                                'issue_year': results['issue_year'],
                                'issue_number': results['issue_number']
                            }
                        })
                        comiccnt += 1
                        mylar.IMPORT_PARSED_COUNT += 1
                    else:
                        failure_list.append({
                            'ComicFilename': comic,
                            'ComicLocation': comicpath,
                            'ComicSize': comicsize,
                            'Unicode_ComicLocation': unicode_comic_path,
                            'parsedinfo': {
                                'series_name': results['series_name'],
                                'series_volume': results['series_volume'],
                                'issue_year': results['issue_year'],
                                'issue_number': results['issue_number']
                            }
                        })
                        mylar.IMPORT_FAILURE_COUNT += 1
                        if comic.endswith('.cbz'):
                            cbz_retry += 1

                except Exception, e:
                    logger.info('bang')
                    utter_failure_list.append({
                        'ComicFilename': comic,
                        'ComicLocation': comicpath,
                        'ComicSize': comicsize,
                        'Unicode_ComicLocation': unicode_comic_path,
                        'parsedinfo': None,
                        'error': e
                    })
                    logger.info(
                        '[' + str(e) +
                        '] FAILURE encountered. Logging the error for ' +
                        comic + ' and continuing...')
                    mylar.IMPORT_FAILURE_COUNT += 1
                    if comic.endswith('.cbz'):
                        cbz_retry += 1
                    continue

            if 'cvinfo' in files:
                cv_location.append(r)
                logger.fdebug('CVINFO found: ' + os.path.join(r))
示例#6
0
                    if issueinfo is None:
                        logger.fdebug(
                            '[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.'
                        )
                        pass
                    else:
                        issuenotes_id = None
                        logger.info(
                            '[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.'
                        )
                        comicname = issueinfo[0]['series']
                        if comicname is not None:
                            logger.fdebug('[IMPORT-CBZ] Series Name: ' +
                                          comicname)
                            as_d = filechecker.FileChecker()
                            as_dyninfo = as_d.dynamic_replace(comicname)
                            logger.fdebug('Dynamic-ComicName: ' +
                                          as_dyninfo['mod_seriesname'])
                        else:
                            logger.fdebug(
                                '[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.'
                            )
                            issueinfo = None
                            issue_number = None

                        if issueinfo is not None:
                            try:
                                issueyear = issueinfo[0]['year']
                            except:
                                issueyear = None
示例#7
0
    def searchit(self):
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        comic_id = self.searchterm['id']
        if comic_id:
            chk_id = helpers.checkthe_id(comic_id)

        annualize = False
        if 'Annual' in series_search:
            series_search = re.sub(' Annual', '', series_search).strip()
            annualize = True
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']
        publisher_search = self.searchterm['publisher']
        spl = [x for x in self.publisher_list if x in publisher_search]
        for x in spl:
            publisher_search = re.sub(x, '', publisher_search).strip()
        logger.info('publisher search set to : ' + publisher_search)
 
        chk_id = None
        # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
        if comic_id:
            chk_id = helpers.checkthe_id(comic_id)
            
        if not chk_id:
            #generate the dynamic name of the series here so we can match it up
            as_d = filechecker.FileChecker()
            as_dinfo = as_d.dynamic_replace(series_search)
            mod_series = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
            as_puinfo = as_d.dynamic_replace(publisher_search)
            pub_series = as_puinfo['mod_seriesname']

            logger.info('series_search: ' + series_search)

            if '/' in series_search:
                series_search = series_search[:series_search.find('/')]
            if ':' in series_search:
                series_search = series_search[:series_search.find(':')]
            if ',' in series_search:
                series_search = series_search[:series_search.find(',')]

            if not mylar.SEARCH_32P:
                url = 'https://walksoftly.itsaninja.party/serieslist.php'
                params = {'series': re.sub('\|','', mod_series.lower()).strip()} #series_search}
                try:
                    t = requests.get(url, params=params, verify=True, headers={'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/')+7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(')+1]})
                except requests.exceptions.RequestException as e:
                    logger.warn(e)
                    return "no results"

                if t.status_code == '619':
                    logger.warn('[' + str(t.status_code) + '] Unable to retrieve data from site.')
                    return "no results"
                elif t.status_code == '999':
                    logger.warn('[' + str(t.status_code) + '] No series title was provided to the search query.')
                    return "no results"

                try:
                    results = t.json()
                except:
                    results = t.text

                if len(results) == 0:
                    logger.warn('No results found for search on 32P.')
                    return "no results"

        with cfscrape.create_scraper() as s:
            s.headers = self.headers
            cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat"))
            cj.load()
            s.cookies = cj
            data = []
            pdata = []
            pubmatch = False

            if not chk_id:
                if mylar.SEARCH_32P:
                    url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                    t = s.get(url, params=params, verify=True, allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class":"object-qtip"},{"data-type":"torrentgroup"})

                for r in results:
                    if mylar.SEARCH_32P:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
                    #seriesresult = as_dinfo['mod_seriesname']
                    logger.info('searchresult: ' + seriesresult + ' --- ' + mod_series + '[' + publisher_search + ']')
                    if seriesresult == mod_series:
                        logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                        data.append({"id":      torrentid,
                                     "series":  torrentname})
                    elif publisher_search in seriesresult:
                        logger.info('publisher match.')
                        tmp_torrentname = re.sub(publisher_search, '', seriesresult).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        logger.info('tmp_torrentname:' + tmp_torrentname)
                        logger.info('as_tinfo:' + as_tinfo['mod_seriesname'])
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip() == mod_series:
                            logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                            pdata.append({"id":      torrentid,
                                          "series":  torrentname})
                            pubmatch = True

                logger.info(str(len(data)) + ' series listed for searching that match.')
            else:
                logger.info('Exact series ID already discovered previously. Setting to :' + chk_id['series'] + '[' + str(chk_id['id']) + ']')
                pdata.append({"id":     chk_id['id'],
                              "series": chk_id['series']})
                pubmatch = True

            if all([len(data) == 0, len(pdata) == 0]):
                return "no results"

            if len(pdata) == 1:
                logger.info(str(len(pdata)) + ' series match the title being search for')
                dataset = pdata
                searchid = pdata[0]['id']
            elif len(data) == 1:
                logger.info(str(len(data)) + ' series match the title being search for')
                dataset = data
                searchid = data[0]['id']
            else:
                dataset = []
                if len(data) > 0:
                    dataset += data
                if len(pdata) > 0:
                    dataset += pdata
                
            if chk_id is None and any([len(data) == 1, len(pdata) == 1]):
                #update the 32p_reference so we avoid doing a url lookup next time
                helpers.checkthe_id(comic_id, dataset)
            else:
                logger.warn('More than one result - will update the 32p reference point once the issue has been successfully matched against.')

            results32p = []
            resultlist = {}

            for x in dataset:

                payload = {'action': 'groupsearch',
                           'id':     x['id'], #searchid,
                           'issue':  issue_search}
                #in order to match up against 0-day stuff, volume has to be none at this point
                #when doing other searches tho, this should be allowed to go through
                #if all([volume_search != 'None', volume_search is not None]):
                #    payload.update({'volume': re.sub('v', '', volume_search).strip()})

                logger.info('payload: ' + str(payload))
                url = 'https://32pag.es/ajax.php'
                time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                try:
                    d = s.post(url, params=payload, verify=True, allow_redirects=True)
                    #logger.debug(self.module + ' Reply from AJAX: \n %s', d.text)
                except Exception as e:
                    logger.info(self.module + ' Could not POST URL %s', url)
                


                try:
                    searchResults = d.json()
                except:
                    searchResults = d.text
                    logger.debug(self.module + ' Search Result did not return valid JSON, falling back on text: %s', searchResults.text)
                    return False

                #logger.debug(self.module + " Search Result: %s", searchResults)
                    
                if searchResults['status'] == 'success' and searchResults['count'] > 0:
                    logger.info('successfully retrieved ' + str(searchResults['count']) + ' search results.')
                    for a in searchResults['details']:
                        results32p.append({'link':      a['id'],
                                           'title':     self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues'],
                                           'filesize':  a['size'],
                                           'issues':     a['issues'],
                                           'pack':      a['pack'],
                                           'format':    a['format'],
                                           'language':  a['language'],
                                           'seeders':   a['seeders'],
                                           'leechers':  a['leechers'],
                                           'scanner':   a['scanner'],
                                           'chkit':     {'id': x['id'], 'series': x['series']},
                                           'pubdate':   datetime.datetime.fromtimestamp(float(a['upload_time'])).strftime('%c')})


            if len(results32p) > 0:
                resultlist['entries'] = sorted(results32p, key=itemgetter('pack','title'), reverse=False)
            else:
                resultlist = 'no results'

        return resultlist
示例#8
0
文件: auth32p.py 项目: hjone72/mylar
    def searchit(self):
        with requests.Session() as s:
            #self.searchterm is a tuple containing series name, issue number and volume.
            series_search = self.searchterm['series']
            issue_search = self.searchterm['issue']
            volume_search = self.searchterm['volume']
            #generate the dynamic name of the series here so we can match it up
            as_d = filechecker.FileChecker()
            as_dinfo = as_d.dynamic_replace(series_search)
            mod_series = as_dinfo['mod_seriesname']

            if '/' in series_search:
                series_search = series_search[:series_search.find('/')]
            if ':' in series_search:
                series_search = series_search[:series_search.find(':')]

            url = 'https://32pag.es/torrents.php'  #?action=serieslist&filter=' + series_search #&filter=F
            params = {'action': 'serieslist', 'filter': series_search}
            s.headers = self.headers
            cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR,
                                           ".32p_cookies.dat"))
            cj.load()
            s.cookies = cj
            time.sleep(1)  #just to make sure we don't hammer, 1s pause.
            t = s.get(url, params=params, verify=True)
            soup = BeautifulSoup(t.content)
            results = soup.find_all("a", {"class": "object-qtip"},
                                    {"data-type": "torrentgroup"})

            data = []

            for r in results:
                torrentid = r['data-id']
                torrentname = r.findNext(text=True)
                torrentname = torrentname.strip()
                as_d = filechecker.FileChecker()
                as_dinfo = as_d.dynamic_replace(torrentname)
                seriesresult = as_dinfo['mod_seriesname']
                logger.info('searchresult: ' + seriesresult + ' --- ' +
                            mod_series)
                if seriesresult == mod_series:
                    logger.info('[MATCH] ' + torrentname + ' [' +
                                str(torrentid) + ']')
                    data.append({"id": torrentid, "series": torrentname})

            logger.info(
                str(len(data)) + ' series listed for searching that match.')

            if len(data) == 1:
                logger.info(
                    str(len(data)) +
                    ' series match the title being search for')
                payload = {
                    'action': 'groupsearch',
                    'id': data[0]['id'],
                    'issue': issue_search
                }
                #in order to match up against 0-day stuff, volume has to be none at this point
                #when doing other searches tho, this should be allowed to go through
                #if all([volume_search != 'None', volume_search is not None]):
                #    payload.update({'volume': re.sub('v', '', volume_search).strip()})

                logger.info('payload: ' + str(payload))
                url = 'https://32pag.es/ajax.php'

                time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                d = s.get(url, params=payload, verify=True)

                results32p = []
                results = {}
                try:
                    searchResults = d.json()
                except:
                    searchResults = d.text
                if searchResults[
                        'status'] == 'success' and searchResults['count'] > 0:
                    logger.info('successfully retrieved ' +
                                str(searchResults['count']) +
                                ' search results.')
                    for a in searchResults['details']:
                        results32p.append({
                            'link':
                            a['id'],
                            'title':
                            self.searchterm['series'] + ' v' + a['volume'] +
                            ' #' + a['issues'],
                            'filesize':
                            a['size'],
                            'pack':
                            a['pack'],
                            'format':
                            a['format'],
                            'language':
                            a['language'],
                            'seeders':
                            a['seeders'],
                            'leechers':
                            a['leechers'],
                            'scanner':
                            a['scanner'],
                            'pubdate':
                            datetime.datetime.fromtimestamp(
                                float(a['upload_time'])).strftime('%c')
                        })
                    results['entries'] = results32p
                else:
                    results = 'no results'
            else:
                results = 'no results'

        return results