Exemplo n.º 1
0
    def searchit(self):
        chk_id = None
        #logger.info('searchterm: %s' % self.searchterm)
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']

        if series_search.startswith('0-Day Comics Pack'):
            #issue = '21' = WED, #volume='2' = 2nd month
            torrentid = 22247  #2018
            publisher_search = None  #'2'  #2nd month
            comic_id = None
        elif all([
                self.searchterm['torrentid_32p'] is not None,
                self.searchterm['torrentid_32p'] != 'None'
        ]):
            torrentid = self.searchterm['torrentid_32p']
            comic_id = self.searchterm['id']
            publisher_search = self.searchterm['publisher']
        else:
            torrentid = None
            comic_id = self.searchterm['id']

            annualize = False
            if 'annual' in series_search.lower():
                series_search = re.sub(' annual', '',
                                       series_search.lower()).strip()
                annualize = True
            publisher_search = self.searchterm['publisher']
            spl = [x for x in self.publisher_list if x in publisher_search]
            for x in spl:
                publisher_search = re.sub(x, '', publisher_search).strip()
            #logger.info('publisher search set to : %s' % publisher_search)

            # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
            if comic_id:
                chk_id = helpers.checkthe_id(comic_id)

            if any([chk_id is None, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                #generate the dynamic name of the series here so we can match it up
                as_d = filechecker.FileChecker()
                as_dinfo = as_d.dynamic_replace(series_search)
                mod_series = re.sub('\|', '',
                                    as_dinfo['mod_seriesname']).strip()
                as_puinfo = as_d.dynamic_replace(publisher_search)
                pub_series = as_puinfo['mod_seriesname']

                logger.fdebug('series_search: %s' % series_search)

                if '/' in series_search:
                    series_search = series_search[:series_search.find('/')]
                if ':' in series_search:
                    series_search = series_search[:series_search.find(':')]
                if ',' in series_search:
                    series_search = series_search[:series_search.find(',')]

                logger.fdebug('config.search_32p: %s' %
                              mylar.CONFIG.SEARCH_32P)
                if mylar.CONFIG.SEARCH_32P is False:
                    url = 'https://walksoftly.itsaninja.party/serieslist.php'
                    params = {
                        'series': re.sub('\|', '', mod_series.lower()).strip()
                    }  #series_search}
                    logger.fdebug('search query: %s' %
                                  re.sub('\|', '', mod_series.lower()).strip())
                    try:
                        t = requests.get(
                            url,
                            params=params,
                            verify=True,
                            headers={
                                'USER-AGENT':
                                mylar.USER_AGENT[:mylar.USER_AGENT.find('/') +
                                                 7] +
                                mylar.USER_AGENT[mylar.USER_AGENT.find('(') +
                                                 1]
                            })
                    except requests.exceptions.RequestException as e:
                        logger.warn(e)
                        return "no results"

                    if t.status_code == '619':
                        logger.warn('[%s] Unable to retrieve data from site.' %
                                    t.status_code)
                        return "no results"
                    elif t.status_code == '999':
                        logger.warn(
                            '[%s] No series title was provided to the search query.'
                            % t.status_code)
                        return "no results"

                    try:
                        results = t.json()
                    except:
                        results = t.text

                    if len(results) == 0:
                        logger.warn('No results found for search on 32P.')
                        return "no results"

#        with cfscrape.create_scraper(delay=15) as s:
#            s.headers = self.headers
#            cj = LWPCookieJar(os.path.join(mylar.CONFIG.SECURE_DIR, ".32p_cookies.dat"))
#            cj.load()
#            s.cookies = cj
        data = []
        pdata = []
        pubmatch = False

        if any([
                series_search.startswith('0-Day Comics Pack'), torrentid
                is not None
        ]):
            data.append({"id": torrentid, "series": series_search})
        else:
            if any([not chk_id, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                if mylar.CONFIG.SEARCH_32P is True:
                    url = 'https://32pag.es/torrents.php'  #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(
                        1)  #just to make sure we don't hammer, 1s pause.
                    t = self.session.get(url,
                                         params=params,
                                         verify=True,
                                         allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class": "object-qtip"},
                                            {"data-type": "torrentgroup"})

                for r in results:
                    if mylar.CONFIG.SEARCH_32P is True:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|', '',
                                          as_dinfo['mod_seriesname']).strip()
                    logger.fdebug('searchresult: %s --- %s [%s]' %
                                  (seriesresult, mod_series, publisher_search))
                    if seriesresult.lower() == mod_series.lower():
                        logger.fdebug('[MATCH] %s [%s]' %
                                      (torrentname, torrentid))
                        data.append({"id": torrentid, "series": torrentname})
                    elif publisher_search.lower() in seriesresult.lower():
                        logger.fdebug('[MATCH] Publisher match.')
                        tmp_torrentname = re.sub(publisher_search.lower(), '',
                                                 seriesresult.lower()).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip(
                        ) == mod_series.lower():
                            logger.fdebug('[MATCH] %s [%s]' %
                                          (torrentname, torrentid))
                            pdata.append({
                                "id": torrentid,
                                "series": torrentname
                            })
                            pubmatch = True

                logger.fdebug('%s series listed for searching that match.' %
                              len(data))
            else:
                logger.fdebug(
                    'Exact series ID already discovered previously. Setting to : %s [%s]'
                    % (chk_id['series'], chk_id['id']))
                pdata.append({"id": chk_id['id'], "series": chk_id['series']})
                pubmatch = True

        if all([len(data) == 0, len(pdata) == 0]):
            return "no results"
        else:
            dataset = []
            if len(data) > 0:
                dataset += data
            if len(pdata) > 0:
                dataset += pdata
            logger.fdebug(
                str(len(dataset)) +
                ' series match the tile being searched for on 32P...')

        if all([
                chk_id is None,
                not series_search.startswith('0-Day Comics Pack'),
                self.searchterm['torrentid_32p'] is not None,
                self.searchterm['torrentid_32p'] != 'None'
        ]) and any([len(data) == 1, len(pdata) == 1]):
            #update the 32p_reference so we avoid doing a url lookup next time
            helpers.checkthe_id(comic_id, dataset)
        else:
            if all([
                    not series_search.startswith('0-Day Comics Pack'),
                    self.searchterm['torrentid_32p'] is not None,
                    self.searchterm['torrentid_32p'] != 'None'
            ]):
                pass
            else:
                logger.debug(
                    'Unable to properly verify reference on 32P - will update the 32P reference point once the issue has been successfully matched against.'
                )

        results32p = []
        resultlist = {}

        for x in dataset:
            #for 0-day packs, issue=week#, volume=month, id=0-day year pack (ie.issue=21&volume=2 for feb.21st)
            payload = {
                "action": "groupsearch",
                "id": x['id'],  #searchid,
                "issue": issue_search
            }
            #in order to match up against 0-day stuff, volume has to be none at this point
            #when doing other searches tho, this should be allowed to go through
            #if all([volume_search != 'None', volume_search is not None]):
            #    payload.update({'volume': re.sub('v', '', volume_search).strip()})
            if series_search.startswith('0-Day Comics Pack'):
                payload.update({"volume": volume_search})

            payload = json.dumps(payload)
            payload = json.loads(payload)

            logger.fdebug('payload: %s' % payload)
            url = 'https://32pag.es/ajax.php'
            time.sleep(1)  #just to make sure we don't hammer, 1s pause.
            try:
                d = self.session.get(url,
                                     params=payload,
                                     verify=True,
                                     allow_redirects=True)
            except Exception as e:
                logger.error('%s [%s] Could not POST URL %s' %
                             (self.module, e, url))

            try:
                searchResults = d.json()
            except Exception as e:
                searchResults = d.text
                logger.debug(
                    '[%s] %s Search Result did not return valid JSON, falling back on text: %s'
                    % (e, self.module, searchResults.text))
                return False

            if searchResults[
                    'status'] == 'success' and searchResults['count'] > 0:
                logger.fdebug('successfully retrieved %s search results' %
                              searchResults['count'])
                for a in searchResults['details']:
                    if series_search.startswith('0-Day Comics Pack'):
                        title = series_search
                    else:
                        title = self.searchterm['series'] + ' v' + a[
                            'volume'] + ' #' + a['issues']
                    results32p.append({
                        'link':
                        a['id'],
                        'title':
                        title,
                        'filesize':
                        a['size'],
                        'issues':
                        a['issues'],
                        'pack':
                        a['pack'],
                        'format':
                        a['format'],
                        'language':
                        a['language'],
                        'seeders':
                        a['seeders'],
                        'leechers':
                        a['leechers'],
                        'scanner':
                        a['scanner'],
                        'chkit': {
                            'id': x['id'],
                            'series': x['series']
                        },
                        'pubdate':
                        datetime.datetime.fromtimestamp(float(
                            a['upload_time'])).strftime(
                                '%a, %d %b %Y %H:%M:%S'),
                        'int_pubdate':
                        float(a['upload_time'])
                    })

            else:
                logger.fdebug('32P did not return any valid search results.')

        if len(results32p) > 0:
            resultlist['entries'] = sorted(results32p,
                                           key=itemgetter('pack', 'title'),
                                           reverse=False)
            logger.debug('%s Resultslist: %s' % (self.module, resultlist))
        else:
            resultlist = 'no results'

        return resultlist
Exemplo n.º 2
0
    def searchit(self):
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        comic_id = self.searchterm['id']
        if comic_id:
            chk_id = helpers.checkthe_id(comic_id)

        annualize = False
        if 'Annual' in series_search:
            series_search = re.sub(' Annual', '', series_search).strip()
            annualize = True
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']
        publisher_search = self.searchterm['publisher']
        spl = [x for x in self.publisher_list if x in publisher_search]
        for x in spl:
            publisher_search = re.sub(x, '', publisher_search).strip()
        logger.info('publisher search set to : ' + publisher_search)
 
        chk_id = None
        # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
        if comic_id:
            chk_id = helpers.checkthe_id(comic_id)
            
        if not chk_id:
            #generate the dynamic name of the series here so we can match it up
            as_d = filechecker.FileChecker()
            as_dinfo = as_d.dynamic_replace(series_search)
            mod_series = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
            as_puinfo = as_d.dynamic_replace(publisher_search)
            pub_series = as_puinfo['mod_seriesname']

            logger.info('series_search: ' + series_search)

            if '/' in series_search:
                series_search = series_search[:series_search.find('/')]
            if ':' in series_search:
                series_search = series_search[:series_search.find(':')]
            if ',' in series_search:
                series_search = series_search[:series_search.find(',')]

            if not mylar.SEARCH_32P:
                url = 'https://walksoftly.itsaninja.party/serieslist.php'
                params = {'series': re.sub('\|','', mod_series.lower()).strip()} #series_search}
                try:
                    t = requests.get(url, params=params, verify=True, headers={'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/')+7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(')+1]})
                except requests.exceptions.RequestException as e:
                    logger.warn(e)
                    return "no results"

                if t.status_code == '619':
                    logger.warn('[' + str(t.status_code) + '] Unable to retrieve data from site.')
                    return "no results"
                elif t.status_code == '999':
                    logger.warn('[' + str(t.status_code) + '] No series title was provided to the search query.')
                    return "no results"

                try:
                    results = t.json()
                except:
                    results = t.text

                if len(results) == 0:
                    logger.warn('No results found for search on 32P.')
                    return "no results"

        with cfscrape.create_scraper() as s:
            s.headers = self.headers
            cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat"))
            cj.load()
            s.cookies = cj
            data = []
            pdata = []
            pubmatch = False

            if not chk_id:
                if mylar.SEARCH_32P:
                    url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                    t = s.get(url, params=params, verify=True, allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class":"object-qtip"},{"data-type":"torrentgroup"})

                for r in results:
                    if mylar.SEARCH_32P:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
                    #seriesresult = as_dinfo['mod_seriesname']
                    logger.info('searchresult: ' + seriesresult + ' --- ' + mod_series + '[' + publisher_search + ']')
                    if seriesresult == mod_series:
                        logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                        data.append({"id":      torrentid,
                                     "series":  torrentname})
                    elif publisher_search in seriesresult:
                        logger.info('publisher match.')
                        tmp_torrentname = re.sub(publisher_search, '', seriesresult).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        logger.info('tmp_torrentname:' + tmp_torrentname)
                        logger.info('as_tinfo:' + as_tinfo['mod_seriesname'])
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip() == mod_series:
                            logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                            pdata.append({"id":      torrentid,
                                          "series":  torrentname})
                            pubmatch = True

                logger.info(str(len(data)) + ' series listed for searching that match.')
            else:
                logger.info('Exact series ID already discovered previously. Setting to :' + chk_id['series'] + '[' + str(chk_id['id']) + ']')
                pdata.append({"id":     chk_id['id'],
                              "series": chk_id['series']})
                pubmatch = True

            if all([len(data) == 0, len(pdata) == 0]):
                return "no results"

            if len(pdata) == 1:
                logger.info(str(len(pdata)) + ' series match the title being search for')
                dataset = pdata
                searchid = pdata[0]['id']
            elif len(data) == 1:
                logger.info(str(len(data)) + ' series match the title being search for')
                dataset = data
                searchid = data[0]['id']
            else:
                dataset = []
                if len(data) > 0:
                    dataset += data
                if len(pdata) > 0:
                    dataset += pdata
                
            if chk_id is None and any([len(data) == 1, len(pdata) == 1]):
                #update the 32p_reference so we avoid doing a url lookup next time
                helpers.checkthe_id(comic_id, dataset)
            else:
                logger.warn('More than one result - will update the 32p reference point once the issue has been successfully matched against.')

            results32p = []
            resultlist = {}

            for x in dataset:

                payload = {'action': 'groupsearch',
                           'id':     x['id'], #searchid,
                           'issue':  issue_search}
                #in order to match up against 0-day stuff, volume has to be none at this point
                #when doing other searches tho, this should be allowed to go through
                #if all([volume_search != 'None', volume_search is not None]):
                #    payload.update({'volume': re.sub('v', '', volume_search).strip()})

                logger.info('payload: ' + str(payload))
                url = 'https://32pag.es/ajax.php'
                time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                try:
                    d = s.post(url, params=payload, verify=True, allow_redirects=True)
                    #logger.debug(self.module + ' Reply from AJAX: \n %s', d.text)
                except Exception as e:
                    logger.info(self.module + ' Could not POST URL %s', url)
                


                try:
                    searchResults = d.json()
                except:
                    searchResults = d.text
                    logger.debug(self.module + ' Search Result did not return valid JSON, falling back on text: %s', searchResults.text)
                    return False

                #logger.debug(self.module + " Search Result: %s", searchResults)
                    
                if searchResults['status'] == 'success' and searchResults['count'] > 0:
                    logger.info('successfully retrieved ' + str(searchResults['count']) + ' search results.')
                    for a in searchResults['details']:
                        results32p.append({'link':      a['id'],
                                           'title':     self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues'],
                                           'filesize':  a['size'],
                                           'issues':     a['issues'],
                                           'pack':      a['pack'],
                                           'format':    a['format'],
                                           'language':  a['language'],
                                           'seeders':   a['seeders'],
                                           'leechers':  a['leechers'],
                                           'scanner':   a['scanner'],
                                           'chkit':     {'id': x['id'], 'series': x['series']},
                                           'pubdate':   datetime.datetime.fromtimestamp(float(a['upload_time'])).strftime('%c')})


            if len(results32p) > 0:
                resultlist['entries'] = sorted(results32p, key=itemgetter('pack','title'), reverse=False)
            else:
                resultlist = 'no results'

        return resultlist
Exemplo n.º 3
0
    def searchit(self):
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        comic_id = self.searchterm['id']

        annualize = False
        if 'Annual' in series_search:
            series_search = re.sub(' Annual', '', series_search).strip()
            annualize = True
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']
        publisher_search = self.searchterm['publisher']
        spl = [x for x in self.publisher_list if x in publisher_search]
        for x in spl:
            publisher_search = re.sub(x, '', publisher_search).strip()
        logger.info('publisher search set to : ' + publisher_search)

        chk_id = None
        # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
        if comic_id:
            chk_id = helpers.checkthe_id(comic_id)

        if any([not chk_id, mylar.DEEP_SEARCH_32P is True]):
            #generate the dynamic name of the series here so we can match it up
            as_d = filechecker.FileChecker()
            as_dinfo = as_d.dynamic_replace(series_search)
            mod_series = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
            as_puinfo = as_d.dynamic_replace(publisher_search)
            pub_series = as_puinfo['mod_seriesname']

            logger.info('series_search: ' + series_search)

            if '/' in series_search:
                series_search = series_search[:series_search.find('/')]
            if ':' in series_search:
                series_search = series_search[:series_search.find(':')]
            if ',' in series_search:
                series_search = series_search[:series_search.find(',')]

            if not mylar.SEARCH_32P:
                url = 'https://walksoftly.itsaninja.party/serieslist.php'
                params = {'series': re.sub('\|','', mod_series.lower()).strip()} #series_search}
                try:
                    t = requests.get(url, params=params, verify=True, headers={'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/')+7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(')+1]})
                except requests.exceptions.RequestException as e:
                    logger.warn(e)
                    return "no results"

                if t.status_code == '619':
                    logger.warn('[' + str(t.status_code) + '] Unable to retrieve data from site.')
                    return "no results"
                elif t.status_code == '999':
                    logger.warn('[' + str(t.status_code) + '] No series title was provided to the search query.')
                    return "no results"

                try:
                    results = t.json()
                except:
                    results = t.text

                if len(results) == 0:
                    logger.warn('No results found for search on 32P.')
                    return "no results"

        with cfscrape.create_scraper() as s:
            s.headers = self.headers
            cj = LWPCookieJar(os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat"))
            cj.load()
            s.cookies = cj
            data = []
            pdata = []
            pubmatch = False

            if any([not chk_id, mylar.DEEP_SEARCH_32P is True]):
                if mylar.SEARCH_32P:
                    url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                    t = s.get(url, params=params, verify=True, allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class":"object-qtip"},{"data-type":"torrentgroup"})

                for r in results:
                    if mylar.SEARCH_32P:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
                    #seriesresult = as_dinfo['mod_seriesname']
                    logger.info('searchresult: ' + seriesresult + ' --- ' + mod_series + '[' + publisher_search + ']')
                    if seriesresult == mod_series:
                        logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                        data.append({"id":      torrentid,
                                     "series":  torrentname})
                    elif publisher_search in seriesresult:
                        logger.info('publisher match.')
                        tmp_torrentname = re.sub(publisher_search, '', seriesresult).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        logger.info('tmp_torrentname:' + tmp_torrentname)
                        logger.info('as_tinfo:' + as_tinfo['mod_seriesname'])
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip() == mod_series:
                            logger.info('[MATCH] ' + torrentname + ' [' + str(torrentid) + ']')
                            pdata.append({"id":      torrentid,
                                          "series":  torrentname})
                            pubmatch = True

                logger.info(str(len(data)) + ' series listed for searching that match.')
            else:
                logger.info('Exact series ID already discovered previously. Setting to :' + chk_id['series'] + '[' + str(chk_id['id']) + ']')
                pdata.append({"id":     chk_id['id'],
                              "series": chk_id['series']})
                pubmatch = True

            if all([len(data) == 0, len(pdata) == 0]):
                return "no results"
            else:
                dataset = []
                if len(data) > 0:
                    dataset += data
                if len(pdata) > 0:
                    dataset += pdata
                logger.info('dataset: %s' % dataset)
                logger.info(str(len(dataset)) + ' series match the tile being searched for on 32P...')

            if chk_id is None and any([len(data) == 1, len(pdata) == 1]):
                #update the 32p_reference so we avoid doing a url lookup next time
                helpers.checkthe_id(comic_id, dataset)
            else:
                logger.debug('Unable to properly verify reference on 32P - will update the 32P reference point once the issue has been successfully matched against.')

            results32p = []
            resultlist = {}

            for x in dataset:
                #for 0-day packs, issue=week#, volume=month, id=0-day year pack
                payload = {'action': 'groupsearch',
                           'id':     x['id'], #searchid,
                           'issue':  issue_search}
                #in order to match up against 0-day stuff, volume has to be none at this point
                #when doing other searches tho, this should be allowed to go through
                #if all([volume_search != 'None', volume_search is not None]):
                #    payload.update({'volume': re.sub('v', '', volume_search).strip()})

                logger.info('payload: ' + str(payload))
                url = 'https://32pag.es/ajax.php'
                time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                try:
                    d = s.post(url, params=payload, verify=True, allow_redirects=True)
                    #logger.debug(self.module + ' Reply from AJAX: \n %s', d.text)
                except Exception as e:
                    logger.info(self.module + ' Could not POST URL %s', url)

                try:
                    searchResults = d.json()
                except:
                    searchResults = d.text
                    logger.debug(self.module + ' Search Result did not return valid JSON, falling back on text: %s', searchResults.text)
                    return False

                #logger.debug(self.module + " Search Result: %s", searchResults)
                if searchResults['status'] == 'success' and searchResults['count'] > 0:
                    logger.info('successfully retrieved ' + str(searchResults['count']) + ' search results.')
                    for a in searchResults['details']:
                        results32p.append({'link':      a['id'],
                                           'title':     self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues'],
                                           'filesize':  a['size'],
                                           'issues':     a['issues'],
                                           'pack':      a['pack'],
                                           'format':    a['format'],
                                           'language':  a['language'],
                                           'seeders':   a['seeders'],
                                           'leechers':  a['leechers'],
                                           'scanner':   a['scanner'],
                                           'chkit':     {'id': x['id'], 'series': x['series']},
                                           'pubdate':   datetime.datetime.fromtimestamp(float(a['upload_time'])).strftime('%a, %d %b %Y %H:%M:%S'),
                                           'int_pubdate': float(a['upload_time'])})


            if len(results32p) > 0:
                resultlist['entries'] = sorted(results32p, key=itemgetter('pack','title'), reverse=False)
            else:
                resultlist = 'no results'

        return resultlist
Exemplo n.º 4
0
    def searchit(self):
        chk_id = None
        #logger.info('searchterm: %s' % self.searchterm)
        #self.searchterm is a tuple containing series name, issue number, volume and publisher.
        series_search = self.searchterm['series']
        issue_search = self.searchterm['issue']
        volume_search = self.searchterm['volume']

        if series_search.startswith('0-Day Comics Pack'):
            #issue = '21' = WED, #volume='2' = 2nd month
            torrentid = 22247 #2018
            publisher_search = None #'2'  #2nd month
            comic_id = None
        elif all([self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None']):
            torrentid = self.searchterm['torrentid_32p']
            comic_id = self.searchterm['id']
            publisher_search = self.searchterm['publisher']
        else:
            torrentid = None
            comic_id = self.searchterm['id']

            annualize = False
            if 'annual' in series_search.lower():
                series_search = re.sub(' annual', '', series_search.lower()).strip()
                annualize = True
            publisher_search = self.searchterm['publisher']
            spl = [x for x in self.publisher_list if x in publisher_search]
            for x in spl:
                publisher_search = re.sub(x, '', publisher_search).strip()
            #logger.info('publisher search set to : %s' % publisher_search)

            # lookup the ComicID in the 32p sqlite3 table to pull the series_id to use.
            if comic_id:
                chk_id = helpers.checkthe_id(comic_id)

            if any([chk_id is None, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                #generate the dynamic name of the series here so we can match it up
                as_d = filechecker.FileChecker()
                as_dinfo = as_d.dynamic_replace(series_search)
                mod_series = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
                as_puinfo = as_d.dynamic_replace(publisher_search)
                pub_series = as_puinfo['mod_seriesname']

                logger.fdebug('series_search: %s' % series_search)

                if '/' in series_search:
                    series_search = series_search[:series_search.find('/')]
                if ':' in series_search:
                    series_search = series_search[:series_search.find(':')]
                if ',' in series_search:
                    series_search = series_search[:series_search.find(',')]

                logger.fdebug('config.search_32p: %s' % mylar.CONFIG.SEARCH_32P)
                if mylar.CONFIG.SEARCH_32P is False:
                    url = 'https://walksoftly.itsaninja.party/serieslist.php'
                    params = {'series': re.sub('\|','', mod_series.lower()).strip()} #series_search}
                    logger.fdebug('search query: %s' % re.sub('\|', '', mod_series.lower()).strip())
                    try:
                        t = requests.get(url, params=params, verify=True, headers={'USER-AGENT': mylar.USER_AGENT[:mylar.USER_AGENT.find('/')+7] + mylar.USER_AGENT[mylar.USER_AGENT.find('(')+1]})
                    except requests.exceptions.RequestException as e:
                        logger.warn(e)
                        return "no results"

                    if t.status_code == '619':
                        logger.warn('[%s] Unable to retrieve data from site.' % t.status_code)
                        return "no results"
                    elif t.status_code == '999':
                        logger.warn('[%s] No series title was provided to the search query.' % t.status_code)
                        return "no results"

                    try:
                        results = t.json()
                    except:
                        results = t.text

                    if len(results) == 0:
                        logger.warn('No results found for search on 32P.')
                        return "no results"

#        with cfscrape.create_scraper(delay=15) as s:
#            s.headers = self.headers
#            cj = LWPCookieJar(os.path.join(mylar.CONFIG.SECURE_DIR, ".32p_cookies.dat"))
#            cj.load()
#            s.cookies = cj
        data = []
        pdata = []
        pubmatch = False

        if any([series_search.startswith('0-Day Comics Pack'), torrentid is not None]):
            data.append({"id":      torrentid,
                         "series":  series_search})
        else:
            if any([not chk_id, mylar.CONFIG.DEEP_SEARCH_32P is True]):
                if mylar.CONFIG.SEARCH_32P is True:
                    url = 'https://32pag.es/torrents.php' #?action=serieslist&filter=' + series_search #&filter=F
                    params = {'action': 'serieslist', 'filter': series_search}
                    time.sleep(1)  #just to make sure we don't hammer, 1s pause.
                    t = self.session.get(url, params=params, verify=True, allow_redirects=True)
                    soup = BeautifulSoup(t.content, "html.parser")
                    results = soup.find_all("a", {"class":"object-qtip"},{"data-type":"torrentgroup"})

                for r in results:
                    if mylar.CONFIG.SEARCH_32P is True:
                        torrentid = r['data-id']
                        torrentname = r.findNext(text=True)
                        torrentname = torrentname.strip()
                    else:
                        torrentid = r['id']
                        torrentname = r['series']

                    as_d = filechecker.FileChecker()
                    as_dinfo = as_d.dynamic_replace(torrentname)
                    seriesresult = re.sub('\|','', as_dinfo['mod_seriesname']).strip()
                    logger.fdebug('searchresult: %s --- %s [%s]' % (seriesresult, mod_series, publisher_search))
                    if seriesresult.lower() == mod_series.lower():
                        logger.fdebug('[MATCH] %s [%s]' % (torrentname, torrentid))
                        data.append({"id":      torrentid,
                                     "series":  torrentname})
                    elif publisher_search.lower() in seriesresult.lower():
                        logger.fdebug('[MATCH] Publisher match.')
                        tmp_torrentname = re.sub(publisher_search.lower(), '', seriesresult.lower()).strip()
                        as_t = filechecker.FileChecker()
                        as_tinfo = as_t.dynamic_replace(tmp_torrentname)
                        if re.sub('\|', '', as_tinfo['mod_seriesname']).strip() == mod_series.lower():
                            logger.fdebug('[MATCH] %s [%s]' % (torrentname, torrentid))
                            pdata.append({"id":      torrentid,
                                          "series":  torrentname})
                            pubmatch = True

                logger.fdebug('%s series listed for searching that match.' % len(data))
            else:
                logger.fdebug('Exact series ID already discovered previously. Setting to : %s [%s]' % (chk_id['series'], chk_id['id']))
                pdata.append({"id":     chk_id['id'],
                              "series": chk_id['series']})
                pubmatch = True

        if all([len(data) == 0, len(pdata) == 0]):
            return "no results"
        else:
            dataset = []
            if len(data) > 0:
                dataset += data
            if len(pdata) > 0:
                dataset += pdata
            logger.fdebug(str(len(dataset)) + ' series match the tile being searched for on 32P...')

        if all([chk_id is None, not series_search.startswith('0-Day Comics Pack'), self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None']) and any([len(data) == 1, len(pdata) == 1]):
            #update the 32p_reference so we avoid doing a url lookup next time
            helpers.checkthe_id(comic_id, dataset)
        else:
            if all([not series_search.startswith('0-Day Comics Pack'), self.searchterm['torrentid_32p'] is not None, self.searchterm['torrentid_32p'] != 'None']):
                pass
            else:
                logger.debug('Unable to properly verify reference on 32P - will update the 32P reference point once the issue has been successfully matched against.')

        results32p = []
        resultlist = {}

        for x in dataset:
            #for 0-day packs, issue=week#, volume=month, id=0-day year pack (ie.issue=21&volume=2 for feb.21st)
            payload = {"action": "groupsearch",
                       "id":     x['id'], #searchid,
                       "issue":  issue_search}
            #in order to match up against 0-day stuff, volume has to be none at this point
            #when doing other searches tho, this should be allowed to go through
            #if all([volume_search != 'None', volume_search is not None]):
            #    payload.update({'volume': re.sub('v', '', volume_search).strip()})
            if series_search.startswith('0-Day Comics Pack'):
                payload.update({"volume": volume_search})

            payload = json.dumps(payload)
            payload = json.loads(payload)

            logger.fdebug('payload: %s' % payload)
            url = 'https://32pag.es/ajax.php'
            time.sleep(1)  #just to make sure we don't hammer, 1s pause.
            try:
                d = self.session.get(url, params=payload, verify=True, allow_redirects=True)
            except Exception as e:
                logger.error('%s [%s] Could not POST URL %s' % (self.module, e, url))

            try:
                searchResults = d.json()
            except Exception as e:
                searchResults = d.text
                logger.debug('[%s] %s Search Result did not return valid JSON, falling back on text: %s' % (e, self.module, searchResults.text))
                return False

            if searchResults['status'] == 'success' and searchResults['count'] > 0:
                logger.fdebug('successfully retrieved %s search results' % searchResults['count'])
                for a in searchResults['details']:
                    if series_search.startswith('0-Day Comics Pack'):
                        title = series_search
                    else:
                        title = self.searchterm['series'] + ' v' + a['volume'] + ' #' + a['issues']
                    results32p.append({'link':      a['id'],
                                       'title':     title,
                                       'filesize':  a['size'],
                                       'issues':     a['issues'],
                                       'pack':      a['pack'],
                                       'format':    a['format'],
                                       'language':  a['language'],
                                       'seeders':   a['seeders'],
                                       'leechers':  a['leechers'],
                                       'scanner':   a['scanner'],
                                       'chkit':     {'id': x['id'], 'series': x['series']},
                                       'pubdate':   datetime.datetime.fromtimestamp(float(a['upload_time'])).strftime('%a, %d %b %Y %H:%M:%S'),
                                       'int_pubdate': float(a['upload_time'])})

            else:
                logger.fdebug('32P did not return any valid search results.')

        if len(results32p) > 0:
            resultlist['entries'] = sorted(results32p, key=itemgetter('pack','title'), reverse=False)
            logger.debug('%s Resultslist: %s' % (self.module, resultlist))
        else:
            resultlist = 'no results'

        return resultlist