def process_query_result(self, xml, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) self.debug(xml[:500]) raise IndexerResultParsingException( "Error while parsing XML from NZBClub", self) for item in tree.iter('item'): try: entry = self.parseItem(item) except IndexerResultParsingRowException: continue accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing results") return IndexerProcessingResult( entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected ) # No paging with RSS. Might need/want to change to HTML and BS
def checkCapsBruteForce(supportedTypes, toCheck, host, apikey, username=None, password=None): supportedIds = [] with concurrent.futures.ThreadPoolExecutor( max_workers=len(toCheck)) as executor: futures_to_ids = { executor.submit(_testId, host, apikey, x["t"], x["id"], x["key"], x["expected"], username=username, password=password): x["id"] for x in toCheck } for future in concurrent.futures.as_completed(futures_to_ids): id = futures_to_ids[future] try: supported, t = future.result() if supported: supportedIds.append(id) supportedTypes.append(t) except Exception as e: logger.error( "An error occurred while trying to test the caps of host %s: %s" % (host, e)) raise IndexerResultParsingException( "Unable to check caps: %s" % str(e), None) return sorted(list(set(supportedIds))), sorted(list(set(supportedTypes)))
def parseXml(self, xmlResponse, maxResults=None): entries = [] try: tree = ET.fromstring(xmlResponse.encode('utf-8')) except Exception: self.exception("Error parsing XML: %s..." % xmlResponse[:500]) raise IndexerResultParsingException("Error parsing XML", self) for item in tree.find("channel").findall("item"): entry = self.parseItem(item) entries.append(entry) if maxResults is not None and len(entries) == maxResults: break response_total_offset = tree.find( "./channel[1]/newznab:response", {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"}) if response_total_offset is None or response_total_offset.attrib[ "total"] == "" or response_total_offset.attrib["offset"] == "": self.warn( "Indexer returned a result page without total results and offset. Shame! *rings bell*" ) offset = 0 total = len(entries) else: total = int(response_total_offset.attrib["total"]) offset = int(response_total_offset.attrib["offset"]) return entries, total, offset
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.has_nfo = NzbSearchResult.HAS_NFO_NO p = re.compile("(.*)\(Size:(\d*)") m = p.search(elem.find("description").text) if m: entry.description = m.group(1) entry.size = int(m.group(2)) * 1024 * 1024 #megabyte to byte if elem.find("category").text.lower() == "tv-dvdrip" or elem.find( "category").text.lower() == "tv-sd": entry.category = getCategoryByName("tvsd") elif elem.find("category").text.lower() == "tv-x264" or elem.find( "category").text.lower == "tv-hd": entry.category = getCategoryByName("tvhd") else: entry.category = getUnknownCategory() entry.indexerguid = elem.find("guid").text[ 30:] #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age = (arrow.utcnow() - pubdate).days accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def parseXml(self, xmlResponse, maxResults=None): entries = [] try: tree = ET.fromstring(xmlResponse.encode('utf-8')) except Exception: self.exception("Error parsing XML: %s..." % xmlResponse[:500]) raise IndexerResultParsingException("Error parsing XML", self) for item in tree.find("channel").findall("item"): entry = self.parseItem(item) entries.append(entry) if maxResults is not None and len(entries) == maxResults: break return entries, len(entries), 0
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = self.getRejectedCountDict() logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) main_table = soup.find(id="results").find('table') if "No results found" in soup.text: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=self.getRejectedCountDict()) if not main_table or not main_table.find("tbody"): self.error("Unable to find main table in NZBIndex page: %s..." % html[:500]) self.debug(html[:500]) raise IndexerResultParsingException("Unable to find main table in NZBIndex page", self) items = main_table.find("tbody").find_all('tr') for row in items: try: entry = self.parseRow(row) except IndexerResultParsingRowException: continue accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) try: page_links = main_table.find("tfoot").find_all("tr")[1].find_all('a') if len(page_links) == 0: total = len(entries) has_more = False else: pagecount = int(page_links[-2].text) currentpage = int(main_table.find("tfoot").find_all("tr")[1].find("b").text) #Don't count "next" has_more = pagecount > currentpage total = self.limit * pagecount #Good enough except Exception: self.exception("Error while trying to find page count") total = len(entries) has_more = False self.debug("Finished processing results") return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = 0 try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.has_nfo = NzbSearchResult.HAS_NFO_NO entry.category = getCategoryByName("anime") entry.indexerguid = elem.find("guid").text entry.details_link = entry.link.replace("dl", "info") pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate entry.age_days = (arrow.utcnow() - pubdate).days accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") logger.info("Last results count %d" % self.last_results_count) entries = Set([]) countRejected = 0 self.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) if "No results in most popular groups" in soup.text: logger.info("No results found for query") return IndexerProcessingResult(entries=[], queries=[], total_known=0, has_more=False, total=0, rejected=0) main_table = soup.find('table', attrs={'id': 'r2'}) if not main_table: self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in binsearch page. This happens sometimes... :-)", self) items = main_table.find_all('tr') for row in items: try: entry = self.parseRow(row) except IndexerResultParsingRowException: continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.add(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing %d results" % len(entries)) page_links = soup.find_all('table', attrs={'class': 'xMenuT'})[1].find_all("a") has_more = len(page_links) > 0 and page_links[-1].text == ">" total_known = False total = 100 if len(page_links) == 0: m = re.compile(r".* (\d+)\+? records.*").search( soup.find_all('table', attrs={'class': 'xMenuT'})[1].text) if m: total = int(m.group(1)) total_known = True return IndexerProcessingResult(entries=entries, queries=[], total_known=total_known, has_more=has_more, total=total, rejected=countRejected)
def process_query_result(self, html, maxResults=None): self.debug("Started processing results") entries = [] logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) main_table = soup.find(id="results").find('table') if "No results found" in soup.text: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False) if not main_table or not main_table.find("tbody"): self.error("Unable to find main table in NZBIndex page: %s..." % html[:500]) self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in NZBIndex page", self) items = main_table.find("tbody").find_all('tr') size_pattern = re.compile( r"(?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))") age_pattern = re.compile(r"(?P<days1>\d+)\.(?P<days2>\d)") title_pattern = re.compile( r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"') for row in items: tds = list(row.find_all("td")) if len(tds) != 5: # advertisement continue entry = self.create_nzb_search_result() entry.indexerguid = row.find("input")["value"] infotd = tds[1] if "password protected" in infotd.text.lower(): entry.passworded = True title = infotd.find("label").text title = title.replace("\n", "") title = re.sub(" +", "", title) m = title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title info = infotd.find("div", class_="fileinfo") if info is not None and re.compile(r"\d NFO").search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO poster = infotd.find("span", class_="poster").find("a") if poster is not None: poster = poster.text.replace("\n", "") poster = re.sub(" +", "", poster) entry.poster = poster.replace("(", " (").replace("<", " <").strip() link = infotd.findAll('a', text=re.compile('Download')) if link is not None and len(link) == 1: entry.link = link[0]["href"] else: self.debug("Did not find link in row") entry.category = "N/A" sizetd = tds[2] m = size_pattern.search(sizetd.text) if not m: self.debug("Unable to find size information in %s" % sizetd.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 entry.size = int(size) grouptd = tds[3] group = grouptd.text.replace("\n", "").replace("a.b.", "alt.binaries.").strip() entry.group = group agetd = tds[4] m = age_pattern.search(agetd.text) days = None hours = None if m: days = int(m.group("days1")) hours = int(m.group("days2")) * 2.4 else: p = re.compile(r"(?P<hours>\d+) hours?") m = p.search(agetd.text) if m: days = 0 hours = int(m.group("hours")) if hours is not None: pubdate = arrow.utcnow().replace( days=-days, hours=-1) # hours because of timezone change below if hours > 0: pubdate = pubdate.replace(hours=-hours) pubdate = pubdate.to( "+01:00") # nzbindex server time, I guess? entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = True # Precise to 2.4 hours, should be enough for duplicate detection entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") else: self.debug("Found no age info in %s" % str(agetd)) collection_links = infotd.findAll("a", href=True, text="View collection") if collection_links is not None and len(collection_links) > 0: entry.details_link = collection_links[0].attrs["href"] accepted, reason = self.accept_result(entry) if accepted: entries.append(entry) else: self.debug("Rejected search result. Reason: %s" % reason) try: page_links = main_table.find("tfoot").find_all("tr")[1].find_all( 'a') if len(page_links) == 0: total = len(entries) has_more = False else: pagecount = int(page_links[-2].text) currentpage = int( main_table.find("tfoot").find_all("tr")[1].find( "b").text) #Don't count "next" has_more = pagecount > currentpage total = self.limit * pagecount #Good enough except Exception: self.exception("Error while trying to find page count") total = len(entries) has_more = False self.debug("Finished processing results") return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more)
def execute_queries(self, queries, searchRequest): if len(queries) == 0: return QueriesExecutionResult(didsearch=False, results=[], indexerSearchEntry=None, indexerApiAccessEntry=None, indexerStatus=None, total=0, loaded_results=0, total_known=True, has_more=False, rejected=self.getRejectedCountDict()) results = [] executed_queries = set() psearch = IndexerSearch(indexer=self.indexer) papiaccess = IndexerApiAccess() indexerStatus = None total_results = 0 total_known = False has_more = False rejected = self.getRejectedCountDict() while len(queries) > 0: query = queries.pop() if query in executed_queries: # To make sure that in case an offset is reported wrong or we have a bug we don't get stuck in an endless loop continue try: request, papiaccess, indexerStatus = self.get_url_with_papi_access( query, "search", saveToDb=False) papiaccess.indexer_search = psearch executed_queries.add(query) if request is not None: if request.text == "": raise IndexerResultParsingException( "Indexer returned an empty page", self) self.check_auth(request.text) self.debug("Successfully loaded URL %s" % request.url) try: parsed_results = self.process_query_result( request.content, searchRequest) results.extend(parsed_results.entries ) # Retrieve the processed results queries.extend( parsed_results.queries ) # Add queries that were added as a result of the parsing, e.g. when the next result page should also be loaded total_results += parsed_results.total total_known = parsed_results.total_known has_more = parsed_results.has_more rejected = parsed_results.rejected papiaccess.response_successful = True indexerStatus = self.handle_indexer_success(False) except Exception: self.exception( "Error while processing search results from indexer %s" % self) raise IndexerResultParsingException( "Error while parsing the results from indexer", self) except IndexerAuthException as e: papiaccess.error = "Authorization error :%s" % e.message self.error(papiaccess.error) indexerStatus = self.handle_indexer_failure( reason="Authentication failed", disable_permanently=True) papiaccess.response_successful = False except IndexerAccessException as e: papiaccess.error = "Access error: %s" % e.message self.error(papiaccess.error) indexerStatus = self.handle_indexer_failure( reason="Access failed") papiaccess.response_successful = False except IndexerResultParsingException as e: papiaccess.error = "Access error: %s" % e.message self.error(papiaccess.error) indexerStatus = self.handle_indexer_failure( reason="Parsing results failed") papiaccess.response_successful = False except Exception as e: self.exception("An error error occurred while searching: %s", e) if papiaccess is not None: papiaccess.error = "Unknown error :%s" % e papiaccess.response_successful = False finally: if papiaccess is not None: psearch.successful = papiaccess.response_successful else: self.error("Unable to save API response to database") psearch.resultsCount = total_results return QueriesExecutionResult(didsearch=True, results=results, indexerSearchEntry=psearch, indexerApiAccessEntry=papiaccess, indexerStatus=indexerStatus, total=total_results, loaded_results=len(results), total_known=total_known, has_more=has_more, rejected=rejected)
def process_query_result(self, xml, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = 0 try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) self.debug(xml[:500]) raise IndexerResultParsingException( "Error while parsing XML from NZBClub", self) group_pattern = re.compile(r"Newsgroup: ?([\w@\. \(\)]+) <br />") poster_pattern = re.compile(r"Poster: ?([\w@\. \(\)]+) <br />") for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() if "password protect" in title.text.lower( ) or "passworded" in title.text.lower(): entry.passworded = True p = re.compile(r'"(.*)"') m = p.search(title.text) if m: entry.title = m.group(1) else: entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.indexer = self.name entry.category = "N/A" entry.details_link = elem.find("link").text entry.indexerguid = elem.find("guid").text[ -8:] # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part description = elem.find("description").text description = urlparse.unquote(description).replace("+", " ") if re.compile(r"\d NFO Files").search( description ): # [x NFO Files] is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO m = group_pattern.search(description) if m: entry.group = m.group(1).strip() m = poster_pattern.search(description) if m: entry.poster = m.group(1).strip() try: pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception as e: self.error("Unable to parse pubdate %s" % pubdate.text) continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing results") return IndexerProcessingResult( entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected ) # No paging with RSS. Might need/want to change to HTML and BS
def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") if "0 results found" in xml_response: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) if "search to short" in xml_response: self.info("omgwtf says the query was too short") return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) entries = [] countRejected = 0 try: tree = ET.fromstring(xml_response) except Exception: self.exception("Error parsing XML: %s..." % xml_response[:500]) raise IndexerResultParsingException("Error parsing XML", self) if tree.tag == "xml": total = int(tree.find("info").find("results").text) current_page = int(tree.find("info").find("current_page").text) total_pages = int(tree.find("info").find("pages").text) has_more = current_page < total_pages for item in tree.find("search_req").findall("post"): entry = self.create_nzb_search_result() entry.indexerguid = item.find("nzbid").text entry.title = item.find("release").text entry.group = item.find("group").text entry.link = item.find("getnzb").text entry.size = long(item.find("sizebytes").text) entry.epoch = long(item.find("usenetage").text) pubdate = arrow.get(entry.epoch) entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = True entry.details_link = item.find("details").text entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find( "getnfo") is not None else NzbSearchResult.HAS_NFO_NO categoryid = item.find("categoryid").text if categoryid in omgwtf_to_categories.keys(): entry.category = omgwtf_to_categories[categoryid] else: entry.category = "N/A" entries.append(entry) return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected) elif tree.tag == "rss": regexGuid = re.compile(r".*\?id=(\w+)&.*") regexGroup = re.compile(r".*Group:<\/b> ([\w\.\-]+)<br \/>.*") for item in tree.find("channel").findall("item"): entry = self.create_nzb_search_result() indexerguid = item.find("guid").text m = regexGuid.match(indexerguid) if m: entry.indexerguid = m.group(1) else: self.warn("Unable to find GUID in " + indexerguid) continue entry.title = item.find("title").text description = item.find("description").text m = regexGroup.match(description) if m: entry.group = m.group(1) else: self.warn("Unable to find group in " + description) continue entry.size = long(item.find("enclosure").attrib["length"]) entry.pubDate = item.find("pubDate").text pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.precise_date = True entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE categoryid = item.find("categoryid").text if categoryid in omgwtf_to_categories.keys(): entry.category = omgwtf_to_categories[categoryid] else: entry.category = "N/A" accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected) else: self.warn("Unknown response type: %s" % xml_response[:100]) return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") logger.info("Last results count %d" % self.last_results_count) entries = Set([]) countRejected = 0 soup = BeautifulSoup(html, config.settings.searching.htmlParser) self.debug("Using HTML parser %s" % config.settings.searching.htmlParser) main_table = soup.find('table', attrs={'id': 'r2'}) if not main_table: self.warn( "Unable to find main table in binsearch page. This just sometimes happens..." ) self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in binsearch page. This happens sometimes... :-)", self) items = main_table.find_all('tr') title_pattern = re.compile( r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"') size_pattern = re.compile( r"size: (?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))") poster_pattern = re.compile(r"&p=(.*)&") goup_pattern = re.compile(r"&g=([\w\.]*)&") nfo_pattern = re.compile(r"\d nfo file") for row in items: entry = self.create_nzb_search_result() title = row.find('span', attrs={'class': 's'}) if title is None: self.debug("Ignored entry because it has no title") continue title = title.text if "password protect" in title.lower( ) or "passworded" in title.lower(): entry.passworded = True m = title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.indexerguid = row.find("input", attrs={"type": "checkbox"})["name"] entry.link = "https://www.binsearch.info/fcgi/nzb.fcgi?q=%s" % entry.indexerguid info = row.find("span", attrs={"class": "d"}) if info is None: self.debug("Ignored entry because it has no info") continue collection_link = info.find( "a" )["href"] # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250' entry.details_link = "%s%s" % (self.host, collection_link) m = goup_pattern.search(collection_link) if m: entry.group = m.group(1).strip() m = poster_pattern.search(collection_link) if m: poster = m.group(1).strip() entry.poster = urlparse.unquote(poster).replace("+", " ") # Size m = size_pattern.search(info.text) if not m: self.debug("Unable to find size information in %s" % info.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "B": pass elif unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 entry.size = int(size) entry.category = "N/A" if nfo_pattern.search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO # Age try: pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search( row.text).group(1) pubdate = arrow.get(pubdate, "DD-MMM-YYYY") entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = False entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception as e: self.error("Unable to find age in %s" % row.find_all("td")[-1:][0].text) continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.add(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing %d results" % len(entries)) page_links = soup.find_all('table', attrs={'class': 'xMenuT'})[1].find_all("a") has_more = len(page_links) > 0 and page_links[-1].text == ">" total_known = False total = 100 if len(page_links) == 0: m = re.compile(r".* (\d+)\+? records.*").search( soup.find_all('table', attrs={'class': 'xMenuT'})[1].text) if m: total = int(m.group(1)) total_known = True return IndexerProcessingResult(entries=entries, queries=[], total_known=total_known, has_more=has_more, total=total, rejected=countRejected)
def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = 0 grouppattern = re.compile(r"Group:</b> ?([\w\.]+)<br ?/>") guidpattern = re.compile(r"(.*/)?([a-zA-Z0-9@\.]+)") try: tree = ET.fromstring(xml_response) except Exception: self.exception("Error parsing XML: %s..." % xml_response[:500]) raise IndexerResultParsingException("Error parsing XML", self) for item in tree.find("channel").findall("item"): usenetdate = None entry = self.create_nzb_search_result() # These are the values that absolutely must be contained in the response entry.title = item.find("title").text entry.link = item.find("link").text entry.attributes = [] entry.pubDate = item.find("pubDate").text entry.indexerguid = item.find("guid").text entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE m = guidpattern.search(entry.indexerguid) if m: entry.indexerguid = m.group(2) description = item.find("description") if description is not None: description = description.text if description is not None and "Group:" in description: # DogNZB has the group in its description m = grouppattern.search(description) if m and m.group(1) != "not available": entry.group = m.group(1) categories = [] for i in item.findall("./newznab:attr", { "newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/" }): attribute_name = i.attrib["name"] attribute_value = i.attrib["value"] if attribute_name == "size": entry.size = int(attribute_value) elif attribute_name == "guid": entry.indexerguid = attribute_value elif attribute_name == "category" and attribute_value != "": try: categories.append(int(attribute_value)) except ValueError: self.error("Unable to parse category %s" % attribute_value) elif attribute_name == "poster": entry.poster = attribute_value elif attribute_name == "info": entry.details_link = attribute_value elif attribute_name == "password" and attribute_value != "0": entry.passworded = True elif attribute_name == "group" and attribute_value != "not available": entry.group = attribute_value elif attribute_name == "usenetdate": usenetdate = arrow.get(attribute_value, 'ddd, DD MMM YYYY HH:mm:ss Z') # Store all the extra attributes, we will return them later for external apis entry.attributes.append({ "name": attribute_name, "value": attribute_value }) if entry.details_link is None: entry.details_link = self.get_details_link(entry.indexerguid) if usenetdate is None: # Not provided by attributes, use pubDate instead usenetdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = usenetdate.timestamp entry.pubdate_utc = str(usenetdate) entry.age_days = (arrow.utcnow() - usenetdate).days entry.precise_date = True # Map category. Try to find the most specific category (like 2040), then the more general one (like 2000) categories = sorted( categories, reverse=True ) # Sort to make the most specific category appear first if len(categories) > 0: for k, v in categories_to_newznab.items(): for c in categories: if c in v: entry.category = k break accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) if maxResults is not None and len(entries) == maxResults: break response_total_offset = tree.find( "./channel[1]/newznab:response", {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"}) if response_total_offset is None or response_total_offset.attrib[ "total"] == "" or response_total_offset.attrib["offset"] == "": self.warn( "Indexer returned a result page without total results and offset. Shame! *rings bell*" ) offset = 0 total = len(entries) else: total = int(response_total_offset.attrib["total"]) offset = int(response_total_offset.attrib["offset"]) if total == 0 or len(entries) == 0: self.info("Query returned no results") return IndexerProcessingResult(entries=entries, queries=[], total=0, total_known=True, has_more=False, rejected=0) return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=offset + len(entries) < total, rejected=countRejected)
def check_caps(host, apikey, userAgent=None, timeout=None): toCheck = [{ "t": "tvsearch", "id": "tvdbid", "key": "121361", "expected": "Thrones" }, { "t": "movie", "id": "imdbid", "key": "0848228", "expected": "Avengers" }, { "t": "tvsearch", "id": "rid", "key": "24493", "expected": "Thrones" }, { "t": "tvsearch", "id": "tvmazeid", "key": "82", "expected": "Thrones" }, { "t": "tvsearch", "id": "traktid", "key": "1390", "expected": "Thrones" }, { "t": "tvsearch", "id": "tmdbid", "key": "1399", "expected": "Thrones" }] supportedIds = [] supportedTypes = [] #Try to find out from caps first try: url = _build_base_url(host, apikey, "caps", None) headers = { 'User-Agent': userAgent if userAgent is not None else config.settings.searching.userAgent } logger.debug("Requesting %s" % url) r = requests.get(url, verify=False, timeout=timeout if timeout is not None else config.settings.searching.timeout, headers=headers) r.raise_for_status() tree = ET.fromstring(r.content) searching = tree.find("searching") doBruteForce = False if searching is not None: tvsearch = searching.find("tv-search") if tvsearch is not None and tvsearch.attrib["available"] == "yes": supportedTypes.append("tvsearch") logger.debug("Found supported TV search") if "supportedParams" in tvsearch.attrib: params = tvsearch.attrib["supportedParams"] params = params.split(",") for x in ["q", "season", "ep"]: if x in params: params.remove(x) supportedIds.extend(params) logger.debug("Found supported TV IDs: %s" % params) else: doBruteForce = True movie_search = searching.find("movie-search") if movie_search is not None and movie_search.attrib[ "available"] == "yes": supportedTypes.append("movie") logger.debug("Found supported movie search") if "supportedParams" in movie_search.attrib: params = movie_search.attrib["supportedParams"] params = params.split(",") for x in ["q", "genre"]: if x in params: params.remove(x) supportedIds.extend(params) logger.debug("Found supported movie IDs: %s" % params) else: doBruteForce = True book_search = searching.find("book-search") if book_search is not None and book_search.attrib[ "available"] == "yes": supportedTypes.append("movie") logger.debug("Found supported book search") can_handle = [y["id"] for y in toCheck] supportedIds = [x for x in supportedIds if x in can_handle] #Only use those we can handle supportedIds = set( supportedIds ) # Return a set because IMDB might be included for TV and movie search, for example if doBruteForce: logger.info( "Unable to read supported params from caps. Will continue with brute force" ) return checkCapsBruteForce(supportedTypes, toCheck, host, apikey) return sorted(list(set(supportedIds))), sorted( list(set(supportedTypes))) except HTTPError as e: logger.error("Error while trying to determine caps: %s" % e) raise IndexerResultParsingException( "Unable to check caps: %s" % str(e), None) except Exception as e: logger.error( "Error getting or parsing caps XML. Will continue with brute force. Error message: %s" % e) return checkCapsBruteForce(supportedTypes, toCheck, host, apikey)
def check_caps(host, apikey, userAgent=None, timeout=None, skipIdsAndTypes=False): toCheck = [{ "t": "tvsearch", "id": "tvdbid", "key": "121361", "expected": "Thrones" }, { "t": "movie", "id": "imdbid", "key": "0848228", "expected": "Avengers" }, { "t": "tvsearch", "id": "rid", "key": "24493", "expected": "Thrones" }, { "t": "tvsearch", "id": "tvmazeid", "key": "82", "expected": "Thrones" }, { "t": "tvsearch", "id": "traktid", "key": "1390", "expected": "Thrones" }, { "t": "tvsearch", "id": "tmdbid", "key": "1399", "expected": "Thrones" }] supportedIds = [] supportedTypes = [] # Try to find out from caps first try: url = _build_base_url(host, apikey, "caps", None) headers = { 'User-Agent': userAgent if userAgent is not None else config.settings.searching.userAgent } logger.debug("Requesting %s" % url) r = requests.get(url, verify=False, timeout=timeout if timeout is not None else config.settings.searching.timeout, headers=headers) r.raise_for_status() tree = ET.fromstring(r.content) categories = [] subCategories = {} for xmlMainCategory in tree.find("categories").findall("category"): categories.append(xmlMainCategory.attrib["name"].lower()) for subcat in xmlMainCategory.findall("subcat"): subCategories[subcat.attrib["id"]] = subcat.attrib["name"] animeCategory = getCategoryNumberOrNone( subCategories, ["5070", "7040"], ["anime", "tv/anime", "tv->anime"]) comicCategory = getCategoryNumberOrNone( subCategories, ["7030"], ["comic", "comics", "books/comics"]) magazineCategory = getCategoryNumberOrNone( subCategories, ["7010"], ["magazine", "mags", "magazines"]) audiobookCategory = getCategoryNumberOrNone( subCategories, ["3030"], ["audiobook", "audio", "audio/audiobook"]) ebookCategory = getCategoryNumberOrNone(subCategories, ["7020", "4050"], ["ebook"]) supportedCategories = [] if "movies" in categories: supportedCategories.extend(["movies", "movieshd", "moviessd"]) if "tv" in categories: supportedCategories.extend(["tv", "tvhd", "tvsd"]) if "audio" in categories: supportedCategories.extend(["audio", "flac", "mp3"]) if "xxx" in categories or "adult" in categories: supportedCategories.append("xxx") if "console" in categories or "gaming" in categories: supportedCategories.append("console") if "apps" in categories or "pc" in categories: supportedCategories.append("pc") if animeCategory: supportedCategories.append("anime") if comicCategory: supportedCategories.append("comic") if audiobookCategory: supportedCategories.append("audiobook") if ebookCategory: supportedCategories.append("ebook") searching = tree.find("searching") doBruteForce = False if searching is not None and not skipIdsAndTypes: tvsearch = searching.find("tv-search") if tvsearch is not None and tvsearch.attrib["available"] == "yes": supportedTypes.append("tvsearch") logger.debug("Found supported TV search") if "supportedParams" in tvsearch.attrib: params = tvsearch.attrib["supportedParams"] params = params.split(",") for x in ["q", "season", "ep"]: if x in params: params.remove(x) supportedIds.extend(params) logger.debug("Found supported TV IDs: %s" % params) else: doBruteForce = True movie_search = searching.find("movie-search") if movie_search is not None and movie_search.attrib[ "available"] == "yes": supportedTypes.append("movie") logger.debug("Found supported movie search") if "supportedParams" in movie_search.attrib: params = movie_search.attrib["supportedParams"] params = params.split(",") for x in ["q", "genre"]: if x in params: params.remove(x) supportedIds.extend(params) logger.debug("Found supported movie IDs: %s" % params) else: doBruteForce = True book_search = searching.find("book-search") if book_search is not None and book_search.attrib[ "available"] == "yes": supportedTypes.append("movie") logger.debug("Found supported book search") can_handle = [y["id"] for y in toCheck] supportedIds = [x for x in supportedIds if x in can_handle] # Only use those we can handle if doBruteForce and not skipIdsAndTypes: logger.info( "Unable to read supported params from caps. Will continue with brute force" ) supportedIds, supportedTypes = checkCapsBruteForce( supportedTypes, toCheck, host, apikey) return { "animeCategory": animeCategory, "comicCategory": comicCategory, "magazineCategory": magazineCategory, "audiobookCategory": audiobookCategory, "ebookCategory": ebookCategory, "supportedIds": sorted(list(set(supportedIds))), "supportedTypes": sorted(list(set(supportedTypes))), "supportedCategories": supportedCategories, "supportsAllCategories": len(supportedCategories) == getNumberOfSelectableCategories() - 1 #Without "all } except HTTPError as e: logger.error("Error while trying to determine caps: %s" % e) raise IndexerResultParsingException( "Unable to check caps: %s" % str(e), None) except Exception as e: logger.error("Error getting or parsing caps XML. Error message: %s" % e) return None
def check_caps(host, apikey, username=None, password=None, userAgent=None, timeout=None, skipIdsAndTypes=False): toCheck = [{ "t": "tvsearch", "id": "tvdbid", "key": "121361", "expected": "Thrones" }, { "t": "movie", "id": "imdbid", "key": "0848228", "expected": "Avengers" }, { "t": "tvsearch", "id": "rid", "key": "24493", "expected": "Thrones" }, { "t": "tvsearch", "id": "tvmazeid", "key": "82", "expected": "Thrones" }, { "t": "tvsearch", "id": "traktid", "key": "1390", "expected": "Thrones" }, { "t": "tvsearch", "id": "tmdbid", "key": "1399", "expected": "Thrones" }] supportedIds = [] supportedTypes = [] # Try to find out from caps first try: url = _build_base_url(host, apikey, "caps", None) headers = { 'User-Agent': userAgent if userAgent is not None else config.settings.searching.userAgent } logger.debug("Requesting %s" % url) r = webaccess.get(url, timeout=timeout if timeout is not None else config.settings.searching.timeout, headers=headers, auth=HTTPBasicAuth(username, password) if username is not None else None) r.raise_for_status() tree = ET.fromstring(r.content) categories = [] subCategories = {} for xmlMainCategory in tree.find("categories").findall("category"): categories.append(xmlMainCategory.attrib["name"].lower()) for subcat in xmlMainCategory.findall("subcat"): subCategories[subcat.attrib["id"]] = subcat.attrib["name"] animeCategory = getCategoryNumberOrNone( subCategories, ["5070", "7040"], ["anime", "tv/anime", "tv->anime"]) comicCategory = getCategoryNumberOrNone( subCategories, ["7030"], ["comic", "comics", "books/comics"]) magazineCategory = getCategoryNumberOrNone( subCategories, ["7010"], ["magazine", "mags", "magazines"]) audiobookCategory = getCategoryNumberOrNone( subCategories, ["3030"], ["audiobook", "audio", "audio/audiobook"]) ebookCategory = getCategoryNumberOrNone(subCategories, ["7020", "4050"], ["ebook"]) supportedCategories = [] if "movies" in categories: supportedCategories.extend(["movies", "movieshd", "moviessd"]) if "tv" in categories: supportedCategories.extend(["tv", "tvhd", "tvsd"]) if "audio" in categories or "music" in categories: supportedCategories.extend(["audio", "flac", "mp3"]) if "xxx" in categories or "adult" in categories: supportedCategories.append("xxx") if "console" in categories or "gaming" in categories or "games" in categories: supportedCategories.append("console") if "apps" in categories or "pc" in categories: supportedCategories.append("pc") if animeCategory: supportedCategories.append("anime") if comicCategory: supportedCategories.append("comic") if audiobookCategory: supportedCategories.append("audiobook") if ebookCategory: supportedCategories.append("ebook") searching = tree.find("searching") if searching is not None and not skipIdsAndTypes: book_search = searching.find("book-search") if book_search is not None and book_search.attrib[ "available"] == "yes": supportedTypes.append("movie") logger.debug("Found supported book search") can_handle = [y["id"] for y in toCheck] supportedIds = [x for x in supportedIds if x in can_handle] # Only use those we can handle if not skipIdsAndTypes: logger.info( "Checking capabilities of indexer by brute force to make sure supported search types are correctly recognized" ) supportedIds, supportedTypes = checkCapsBruteForce( supportedTypes, toCheck, host, apikey, username=username, password=password) #Check indexer type (nzedb, newznab, nntmux) url = _build_base_url(host, apikey, "tvsearch", None) headers = { 'User-Agent': userAgent if userAgent is not None else config.settings.searching.userAgent } logger.debug("Requesting %s" % url) r = webaccess.get(url, timeout=timeout if timeout is not None else config.settings.searching.timeout, headers=headers, auth=HTTPBasicAuth(username, password) if username is not None else None) r.raise_for_status() generator = ET.fromstring(r.content).find("channel/generator") if generator is not None: backend = generator.text logger.info( "Found generator tag indicating that indexer %s is a %s based indexer" % (host, backend)) else: logger.info("Assuming indexer %s is a newznab based indexer" % host) backend = "newznab" return { "animeCategory": animeCategory, "comicCategory": comicCategory, "magazineCategory": magazineCategory, "audiobookCategory": audiobookCategory, "ebookCategory": ebookCategory, "supportedIds": sorted(list(set(supportedIds))), "supportedTypes": sorted(list(set(supportedTypes))), "supportedCategories": supportedCategories, "supportsAllCategories": len(supportedCategories) == getNumberOfSelectableCategories() - 1, #Without "all "backend": backend } except Exception as e: logger.error("Error getting or parsing caps XML. Error message: %s" % e) raise IndexerResultParsingException( "Unable to check caps: %s" % str(e), None)
def check_caps(host, apikey): toCheck = [ {"t": "tvsearch", "id": "tvdbid", "key": "121361", "expected": "Thrones" }, {"t": "movie", "id": "imdbid", "key": "0848228", "expected": "Avengers" }, {"t": "tvsearch", "id": "rid", "key": "24493", "expected": "Thrones" }, {"t": "tvsearch", "id": "tvmazeid", "key": "82", "expected": "Thrones" }, {"t": "tvsearch", "id": "traktid", "key": "1390", "expected": "Thrones" }, {"t": "tvsearch", "id": "tmdbid", "key": "1399", "expected": "Thrones" } ] result = [] #Try to find out from caps first try: url = _build_base_url(host, apikey, "caps", None) headers = { 'User-Agent': config.settings.searching.userAgent } logger.debug("Requesting %s" % url) r = requests.get(url, verify=False, timeout=config.settings.searching.timeout, headers=headers) r.raise_for_status() tree = ET.fromstring(r.content) searching = tree.find("searching") if searching is not None: tvsearch = searching.find("tv-search") if tvsearch is not None and tvsearch.attrib["available"] == "yes": params = tvsearch.attrib["supportedParams"] params = params.split(",") for x in ["q", "season", "ep"]: if x in params: params.remove(x) result.extend(params) logger.debug("Found supported TV IDs: %s" % params) movie_search = searching.find("movie-search") if movie_search is not None and movie_search.attrib["available"] == "yes": params = movie_search.attrib["supportedParams"] params = params.split(",") for x in ["q", "genre"]: if x in params: params.remove(x) result.extend(params) logger.debug("Found supported movie IDs: %s" % params) can_handle = [y["id"] for y in toCheck] result = [x for x in result if x in can_handle] #Only use those we can handle result = set(result) # Return a set because IMDB might be included for TV and movie search, for example return set(result) except Exception as e: logger.error("Error getting or parsing caps XML. Will continue with brute force. Error message: %s" % e) with concurrent.futures.ThreadPoolExecutor(max_workers=len(toCheck)) as executor: futures_to_ids = {executor.submit(_testId, host, apikey, x["t"], x["id"], x["key"], x["expected"]): x["id"] for x in toCheck} for future in concurrent.futures.as_completed(futures_to_ids): id = futures_to_ids[future] try: supported = future.result() if supported: result.append(id) except Exception as e: logger.error("An error occurred while trying to test the caps of host %s: %s" % (host, e)) raise IndexerResultParsingException("Unable to check caps: %s" % str(e), None) return set(result)