Пример #1
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        self.debug("Started processing results")
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            self.debug(xml[:500])
            raise IndexerResultParsingException(
                "Error while parsing XML from NZBClub", self)

        for item in tree.iter('item'):
            try:
                entry = self.parseItem(item)
            except IndexerResultParsingRowException:
                continue

            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing results")
        return IndexerProcessingResult(
            entries=entries,
            queries=[],
            total=len(entries),
            total_known=True,
            has_more=False,
            rejected=countRejected
        )  # No paging with RSS. Might need/want to change to HTML and BS
Пример #2
0
def checkCapsBruteForce(supportedTypes,
                        toCheck,
                        host,
                        apikey,
                        username=None,
                        password=None):
    supportedIds = []
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=len(toCheck)) as executor:
        futures_to_ids = {
            executor.submit(_testId,
                            host,
                            apikey,
                            x["t"],
                            x["id"],
                            x["key"],
                            x["expected"],
                            username=username,
                            password=password): x["id"]
            for x in toCheck
        }
        for future in concurrent.futures.as_completed(futures_to_ids):
            id = futures_to_ids[future]
            try:
                supported, t = future.result()
                if supported:
                    supportedIds.append(id)
                    supportedTypes.append(t)
            except Exception as e:
                logger.error(
                    "An error occurred while trying to test the caps of host %s: %s"
                    % (host, e))
                raise IndexerResultParsingException(
                    "Unable to check caps: %s" % str(e), None)
    return sorted(list(set(supportedIds))), sorted(list(set(supportedTypes)))
Пример #3
0
    def parseXml(self, xmlResponse, maxResults=None):
        entries = []

        try:
            tree = ET.fromstring(xmlResponse.encode('utf-8'))
        except Exception:
            self.exception("Error parsing XML: %s..." % xmlResponse[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        for item in tree.find("channel").findall("item"):
            entry = self.parseItem(item)
            entries.append(entry)
            if maxResults is not None and len(entries) == maxResults:
                break

        response_total_offset = tree.find(
            "./channel[1]/newznab:response",
            {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"})
        if response_total_offset is None or response_total_offset.attrib[
                "total"] == "" or response_total_offset.attrib["offset"] == "":
            self.warn(
                "Indexer returned a result page without total results and offset. Shame! *rings bell*"
            )
            offset = 0
            total = len(entries)
        else:
            total = int(response_total_offset.attrib["total"])
            offset = int(response_total_offset.attrib["offset"])
        return entries, total, offset
Пример #4
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

            p = re.compile("(.*)\(Size:(\d*)")
            m = p.search(elem.find("description").text)
            if m:
                entry.description = m.group(1)
                entry.size = int(m.group(2)) * 1024 * 1024  #megabyte to byte
            if elem.find("category").text.lower() == "tv-dvdrip" or elem.find(
                    "category").text.lower() == "tv-sd":
                entry.category = getCategoryByName("tvsd")
            elif elem.find("category").text.lower() == "tv-x264" or elem.find(
                    "category").text.lower == "tv-hd":
                entry.category = getCategoryByName("tvhd")
            else:
                entry.category = getUnknownCategory()

            entry.indexerguid = elem.find("guid").text[
                30:]  #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work

            pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age = (arrow.utcnow() - pubdate).days

            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=True,
                                       has_more=False,
                                       total=len(entries),
                                       rejected=countRejected)
Пример #5
0
    def parseXml(self, xmlResponse, maxResults=None):
        entries = []

        try:
            tree = ET.fromstring(xmlResponse.encode('utf-8'))
        except Exception:
            self.exception("Error parsing XML: %s..." % xmlResponse[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        for item in tree.find("channel").findall("item"):
            entry = self.parseItem(item)
            entries.append(entry)
            if maxResults is not None and len(entries) == maxResults:
                break
        return entries, len(entries), 0
Пример #6
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")

        entries = []
        countRejected = self.getRejectedCountDict()
        logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        main_table = soup.find(id="results").find('table')

        if "No results found" in soup.text:
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=self.getRejectedCountDict())
        if not main_table or not main_table.find("tbody"):
            self.error("Unable to find main table in NZBIndex page: %s..." % html[:500])
            self.debug(html[:500])
            raise IndexerResultParsingException("Unable to find main table in NZBIndex page", self)

        items = main_table.find("tbody").find_all('tr')
        
        for row in items:
            try:
                entry = self.parseRow(row)
            except IndexerResultParsingRowException:
                continue
            accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)
        try:
            page_links = main_table.find("tfoot").find_all("tr")[1].find_all('a')
            if len(page_links) == 0:
                total = len(entries)
                has_more = False
            else:
                pagecount = int(page_links[-2].text)
                currentpage = int(main_table.find("tfoot").find_all("tr")[1].find("b").text) #Don't count "next"
                has_more = pagecount > currentpage
                total = self.limit * pagecount #Good enough
        except Exception:
            self.exception("Error while trying to find page count")
            total = len(entries)
            has_more = False

            self.debug("Finished processing results")
        return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)
Пример #7
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.size = int(url.attrib["length"])
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            entry.category = getCategoryByName("anime")
            entry.indexerguid = elem.find("guid").text
            entry.details_link = entry.link.replace("dl", "info")
            pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate
            entry.age_days = (arrow.utcnow() - pubdate).days

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=True,
                                       has_more=False,
                                       total=len(entries),
                                       rejected=countRejected)
Пример #8
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")
        logger.info("Last results count %d" % self.last_results_count)

        entries = Set([])
        countRejected = 0
        self.debug("Using HTML parser %s" %
                   config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)

        if "No results in most popular groups" in soup.text:
            logger.info("No results found for query")
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total_known=0,
                                           has_more=False,
                                           total=0,
                                           rejected=0)
        main_table = soup.find('table', attrs={'id': 'r2'})

        if not main_table:
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in binsearch page. This happens sometimes... :-)",
                self)

        items = main_table.find_all('tr')

        for row in items:
            try:
                entry = self.parseRow(row)
            except IndexerResultParsingRowException:
                continue
            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.add(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing %d results" % len(entries))

        page_links = soup.find_all('table', attrs={'class':
                                                   'xMenuT'})[1].find_all("a")
        has_more = len(page_links) > 0 and page_links[-1].text == ">"
        total_known = False
        total = 100
        if len(page_links) == 0:
            m = re.compile(r".* (\d+)\+? records.*").search(
                soup.find_all('table', attrs={'class': 'xMenuT'})[1].text)
            if m:
                total = int(m.group(1))
                total_known = True

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=total_known,
                                       has_more=has_more,
                                       total=total,
                                       rejected=countRejected)
Пример #9
0
    def process_query_result(self, html, maxResults=None):
        self.debug("Started processing results")

        entries = []
        logger.debug("Using HTML parser %s" %
                     config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        main_table = soup.find(id="results").find('table')

        if "No results found" in soup.text:
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False)
        if not main_table or not main_table.find("tbody"):
            self.error("Unable to find main table in NZBIndex page: %s..." %
                       html[:500])
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in NZBIndex page", self)

        items = main_table.find("tbody").find_all('tr')
        size_pattern = re.compile(
            r"(?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))")
        age_pattern = re.compile(r"(?P<days1>\d+)\.(?P<days2>\d)")
        title_pattern = re.compile(
            r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"')
        for row in items:
            tds = list(row.find_all("td"))
            if len(tds) != 5:
                # advertisement
                continue
            entry = self.create_nzb_search_result()

            entry.indexerguid = row.find("input")["value"]

            infotd = tds[1]

            if "password protected" in infotd.text.lower():
                entry.passworded = True

            title = infotd.find("label").text
            title = title.replace("\n", "")
            title = re.sub(" +", "", title)

            m = title_pattern.search(title)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title

            info = infotd.find("div", class_="fileinfo")
            if info is not None and re.compile(r"\d NFO").search(
                    info.text):  # 1 nfo file is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            poster = infotd.find("span", class_="poster").find("a")
            if poster is not None:
                poster = poster.text.replace("\n", "")
                poster = re.sub(" +", "", poster)
                entry.poster = poster.replace("(", " (").replace("<",
                                                                 " <").strip()

            link = infotd.findAll('a', text=re.compile('Download'))
            if link is not None and len(link) == 1:
                entry.link = link[0]["href"]
            else:
                self.debug("Did not find link in row")

            entry.category = "N/A"

            sizetd = tds[2]

            m = size_pattern.search(sizetd.text)
            if not m:
                self.debug("Unable to find size information in %s" %
                           sizetd.text)
            else:
                size = float(m.group("size"))
                unit = m.group("unit")
                if unit == "KB":
                    size *= 1024
                elif unit == "MB":
                    size = size * 1024 * 1024
                elif unit == "GB":
                    size = size * 1024 * 1024 * 1024
                entry.size = int(size)

            grouptd = tds[3]
            group = grouptd.text.replace("\n",
                                         "").replace("a.b.",
                                                     "alt.binaries.").strip()
            entry.group = group

            agetd = tds[4]

            m = age_pattern.search(agetd.text)
            days = None
            hours = None
            if m:
                days = int(m.group("days1"))
                hours = int(m.group("days2")) * 2.4
            else:
                p = re.compile(r"(?P<hours>\d+) hours?")
                m = p.search(agetd.text)
                if m:
                    days = 0
                    hours = int(m.group("hours"))
            if hours is not None:
                pubdate = arrow.utcnow().replace(
                    days=-days,
                    hours=-1)  # hours because of timezone change below
                if hours > 0:
                    pubdate = pubdate.replace(hours=-hours)
                pubdate = pubdate.to(
                    "+01:00")  # nzbindex server time, I guess?
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = True  # Precise to 2.4 hours, should be enough for duplicate detection
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            else:
                self.debug("Found no age info in %s" % str(agetd))

            collection_links = infotd.findAll("a",
                                              href=True,
                                              text="View collection")
            if collection_links is not None and len(collection_links) > 0:
                entry.details_link = collection_links[0].attrs["href"]
            accepted, reason = self.accept_result(entry)
            if accepted:
                entries.append(entry)
            else:
                self.debug("Rejected search result. Reason: %s" % reason)
        try:
            page_links = main_table.find("tfoot").find_all("tr")[1].find_all(
                'a')
            if len(page_links) == 0:
                total = len(entries)
                has_more = False
            else:
                pagecount = int(page_links[-2].text)
                currentpage = int(
                    main_table.find("tfoot").find_all("tr")[1].find(
                        "b").text)  #Don't count "next"
                has_more = pagecount > currentpage
                total = self.limit * pagecount  #Good enough
        except Exception:
            self.exception("Error while trying to find page count")
            total = len(entries)
            has_more = False

            self.debug("Finished processing results")
        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total=total,
                                       total_known=True,
                                       has_more=has_more)
Пример #10
0
    def execute_queries(self, queries, searchRequest):
        if len(queries) == 0:
            return QueriesExecutionResult(didsearch=False,
                                          results=[],
                                          indexerSearchEntry=None,
                                          indexerApiAccessEntry=None,
                                          indexerStatus=None,
                                          total=0,
                                          loaded_results=0,
                                          total_known=True,
                                          has_more=False,
                                          rejected=self.getRejectedCountDict())
        results = []
        executed_queries = set()
        psearch = IndexerSearch(indexer=self.indexer)
        papiaccess = IndexerApiAccess()
        indexerStatus = None
        total_results = 0
        total_known = False
        has_more = False
        rejected = self.getRejectedCountDict()
        while len(queries) > 0:
            query = queries.pop()
            if query in executed_queries:
                # To make sure that in case an offset is reported wrong or we have a bug we don't get stuck in an endless loop
                continue

            try:
                request, papiaccess, indexerStatus = self.get_url_with_papi_access(
                    query, "search", saveToDb=False)
                papiaccess.indexer_search = psearch

                executed_queries.add(query)

                if request is not None:
                    if request.text == "":
                        raise IndexerResultParsingException(
                            "Indexer returned an empty page", self)
                    self.check_auth(request.text)
                    self.debug("Successfully loaded URL %s" % request.url)
                    try:

                        parsed_results = self.process_query_result(
                            request.content, searchRequest)
                        results.extend(parsed_results.entries
                                       )  # Retrieve the processed results
                        queries.extend(
                            parsed_results.queries
                        )  # Add queries that were added as a result of the parsing, e.g. when the next result page should also be loaded
                        total_results += parsed_results.total
                        total_known = parsed_results.total_known
                        has_more = parsed_results.has_more
                        rejected = parsed_results.rejected

                        papiaccess.response_successful = True
                        indexerStatus = self.handle_indexer_success(False)
                    except Exception:
                        self.exception(
                            "Error while processing search results from indexer %s"
                            % self)
                        raise IndexerResultParsingException(
                            "Error while parsing the results from indexer",
                            self)
            except IndexerAuthException as e:
                papiaccess.error = "Authorization error :%s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Authentication failed", disable_permanently=True)
                papiaccess.response_successful = False
            except IndexerAccessException as e:
                papiaccess.error = "Access error: %s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Access failed")
                papiaccess.response_successful = False
            except IndexerResultParsingException as e:
                papiaccess.error = "Access error: %s" % e.message
                self.error(papiaccess.error)
                indexerStatus = self.handle_indexer_failure(
                    reason="Parsing results failed")
                papiaccess.response_successful = False
            except Exception as e:
                self.exception("An error error occurred while searching: %s",
                               e)
                if papiaccess is not None:
                    papiaccess.error = "Unknown error :%s" % e
                    papiaccess.response_successful = False
            finally:
                if papiaccess is not None:
                    psearch.successful = papiaccess.response_successful
                else:
                    self.error("Unable to save API response to database")
                psearch.resultsCount = total_results
        return QueriesExecutionResult(didsearch=True,
                                      results=results,
                                      indexerSearchEntry=psearch,
                                      indexerApiAccessEntry=papiaccess,
                                      indexerStatus=indexerStatus,
                                      total=total_results,
                                      loaded_results=len(results),
                                      total_known=total_known,
                                      has_more=has_more,
                                      rejected=rejected)
Пример #11
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        self.debug("Started processing results")
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            self.debug(xml[:500])
            raise IndexerResultParsingException(
                "Error while parsing XML from NZBClub", self)

        group_pattern = re.compile(r"Newsgroup: ?([\w@\. \(\)]+) <br />")
        poster_pattern = re.compile(r"Poster: ?([\w@\. \(\)]+) <br />")
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            if "password protect" in title.text.lower(
            ) or "passworded" in title.text.lower():
                entry.passworded = True

            p = re.compile(r'"(.*)"')
            m = p.search(title.text)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title.text

            entry.link = url.attrib["url"]
            entry.size = int(url.attrib["length"])
            entry.indexer = self.name
            entry.category = "N/A"
            entry.details_link = elem.find("link").text

            entry.indexerguid = elem.find("guid").text[
                -8:]  # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part

            description = elem.find("description").text
            description = urlparse.unquote(description).replace("+", " ")
            if re.compile(r"\d NFO Files").search(
                    description
            ):  # [x NFO Files] is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            m = group_pattern.search(description)
            if m:
                entry.group = m.group(1).strip()
            m = poster_pattern.search(description)
            if m:
                entry.poster = m.group(1).strip()

            try:

                pubdate = arrow.get(pubdate.text,
                                    'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            except Exception as e:
                self.error("Unable to parse pubdate %s" % pubdate.text)
                continue

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing results")
        return IndexerProcessingResult(
            entries=entries,
            queries=[],
            total=len(entries),
            total_known=True,
            has_more=False,
            rejected=countRejected
        )  # No paging with RSS. Might need/want to change to HTML and BS
Пример #12
0
    def process_query_result(self,
                             xml_response,
                             searchRequest,
                             maxResults=None):
        self.debug("Started processing results")

        if "0 results found" in xml_response:
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)
        if "search to short" in xml_response:
            self.info("omgwtf says the query was too short")
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)

        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)

        if tree.tag == "xml":
            total = int(tree.find("info").find("results").text)
            current_page = int(tree.find("info").find("current_page").text)
            total_pages = int(tree.find("info").find("pages").text)
            has_more = current_page < total_pages
            for item in tree.find("search_req").findall("post"):
                entry = self.create_nzb_search_result()
                entry.indexerguid = item.find("nzbid").text
                entry.title = item.find("release").text
                entry.group = item.find("group").text
                entry.link = item.find("getnzb").text
                entry.size = long(item.find("sizebytes").text)
                entry.epoch = long(item.find("usenetage").text)
                pubdate = arrow.get(entry.epoch)
                entry.pubdate_utc = str(pubdate)
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = True
                entry.details_link = item.find("details").text
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find(
                    "getnfo") is not None else NzbSearchResult.HAS_NFO_NO
                categoryid = item.find("categoryid").text
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = omgwtf_to_categories[categoryid]
                else:
                    entry.category = "N/A"
                entries.append(entry)
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=total,
                                           total_known=True,
                                           has_more=has_more,
                                           rejected=countRejected)
        elif tree.tag == "rss":
            regexGuid = re.compile(r".*\?id=(\w+)&.*")
            regexGroup = re.compile(r".*Group:<\/b> ([\w\.\-]+)<br \/>.*")
            for item in tree.find("channel").findall("item"):
                entry = self.create_nzb_search_result()
                indexerguid = item.find("guid").text
                m = regexGuid.match(indexerguid)
                if m:
                    entry.indexerguid = m.group(1)
                else:
                    self.warn("Unable to find GUID in " + indexerguid)
                    continue
                entry.title = item.find("title").text
                description = item.find("description").text
                m = regexGroup.match(description)
                if m:
                    entry.group = m.group(1)
                else:
                    self.warn("Unable to find group in " + description)
                    continue
                entry.size = long(item.find("enclosure").attrib["length"])
                entry.pubDate = item.find("pubDate").text
                pubdate = arrow.get(entry.pubDate,
                                    'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.precise_date = True
                entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
                categoryid = item.find("categoryid").text
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = omgwtf_to_categories[categoryid]
                else:
                    entry.category = "N/A"
                accepted, reason = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=len(entries),
                                           total_known=True,
                                           has_more=False,
                                           rejected=countRejected)
        else:
            self.warn("Unknown response type: %s" % xml_response[:100])
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=countRejected)
Пример #13
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")
        logger.info("Last results count %d" % self.last_results_count)
        entries = Set([])
        countRejected = 0
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        self.debug("Using HTML parser %s" %
                   config.settings.searching.htmlParser)

        main_table = soup.find('table', attrs={'id': 'r2'})

        if not main_table:
            self.warn(
                "Unable to find main table in binsearch page. This just sometimes happens..."
            )
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in binsearch page. This happens sometimes... :-)",
                self)

        items = main_table.find_all('tr')
        title_pattern = re.compile(
            r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"')
        size_pattern = re.compile(
            r"size: (?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))")
        poster_pattern = re.compile(r"&p=(.*)&")
        goup_pattern = re.compile(r"&g=([\w\.]*)&")
        nfo_pattern = re.compile(r"\d nfo file")
        for row in items:
            entry = self.create_nzb_search_result()
            title = row.find('span', attrs={'class': 's'})

            if title is None:
                self.debug("Ignored entry because it has no title")
                continue
            title = title.text

            if "password protect" in title.lower(
            ) or "passworded" in title.lower():
                entry.passworded = True

            m = title_pattern.search(title)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title

            entry.indexerguid = row.find("input", attrs={"type":
                                                         "checkbox"})["name"]
            entry.link = "https://www.binsearch.info/fcgi/nzb.fcgi?q=%s" % entry.indexerguid
            info = row.find("span", attrs={"class": "d"})
            if info is None:
                self.debug("Ignored entry because it has no info")
                continue

            collection_link = info.find(
                "a"
            )["href"]  # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250'
            entry.details_link = "%s%s" % (self.host, collection_link)
            m = goup_pattern.search(collection_link)
            if m:
                entry.group = m.group(1).strip()

            m = poster_pattern.search(collection_link)
            if m:
                poster = m.group(1).strip()
                entry.poster = urlparse.unquote(poster).replace("+", " ")

            # Size
            m = size_pattern.search(info.text)
            if not m:
                self.debug("Unable to find size information in %s" % info.text)
            else:
                size = float(m.group("size"))
                unit = m.group("unit")
                if unit == "B":
                    pass
                elif unit == "KB":
                    size *= 1024
                elif unit == "MB":
                    size = size * 1024 * 1024
                elif unit == "GB":
                    size = size * 1024 * 1024 * 1024

                entry.size = int(size)

            entry.category = "N/A"

            if nfo_pattern.search(
                    info.text):  # 1 nfo file is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO

            # Age
            try:
                pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(
                    row.text).group(1)
                pubdate = arrow.get(pubdate, "DD-MMM-YYYY")
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = False
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            except Exception as e:
                self.error("Unable to find age in %s" %
                           row.find_all("td")[-1:][0].text)
                continue

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.add(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing %d results" % len(entries))

        page_links = soup.find_all('table', attrs={'class':
                                                   'xMenuT'})[1].find_all("a")
        has_more = len(page_links) > 0 and page_links[-1].text == ">"
        total_known = False
        total = 100
        if len(page_links) == 0:
            m = re.compile(r".* (\d+)\+? records.*").search(
                soup.find_all('table', attrs={'class': 'xMenuT'})[1].text)
            if m:
                total = int(m.group(1))
                total_known = True

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=total_known,
                                       has_more=has_more,
                                       total=total,
                                       rejected=countRejected)
Пример #14
0
    def process_query_result(self,
                             xml_response,
                             searchRequest,
                             maxResults=None):
        self.debug("Started processing results")

        entries = []
        countRejected = 0
        grouppattern = re.compile(r"Group:</b> ?([\w\.]+)<br ?/>")
        guidpattern = re.compile(r"(.*/)?([a-zA-Z0-9@\.]+)")

        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        for item in tree.find("channel").findall("item"):
            usenetdate = None
            entry = self.create_nzb_search_result()
            # These are the values that absolutely must be contained in the response
            entry.title = item.find("title").text
            entry.link = item.find("link").text
            entry.attributes = []
            entry.pubDate = item.find("pubDate").text
            entry.indexerguid = item.find("guid").text
            entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
            m = guidpattern.search(entry.indexerguid)
            if m:
                entry.indexerguid = m.group(2)

            description = item.find("description")
            if description is not None:
                description = description.text
                if description is not None and "Group:" in description:  # DogNZB has the group in its description
                    m = grouppattern.search(description)
                    if m and m.group(1) != "not available":
                        entry.group = m.group(1)

            categories = []
            for i in item.findall("./newznab:attr", {
                    "newznab":
                    "http://www.newznab.com/DTD/2010/feeds/attributes/"
            }):
                attribute_name = i.attrib["name"]
                attribute_value = i.attrib["value"]
                if attribute_name == "size":
                    entry.size = int(attribute_value)
                elif attribute_name == "guid":
                    entry.indexerguid = attribute_value
                elif attribute_name == "category" and attribute_value != "":
                    try:
                        categories.append(int(attribute_value))
                    except ValueError:
                        self.error("Unable to parse category %s" %
                                   attribute_value)
                elif attribute_name == "poster":
                    entry.poster = attribute_value
                elif attribute_name == "info":
                    entry.details_link = attribute_value
                elif attribute_name == "password" and attribute_value != "0":
                    entry.passworded = True
                elif attribute_name == "group" and attribute_value != "not available":
                    entry.group = attribute_value
                elif attribute_name == "usenetdate":
                    usenetdate = arrow.get(attribute_value,
                                           'ddd, DD MMM YYYY HH:mm:ss Z')
                # Store all the extra attributes, we will return them later for external apis
                entry.attributes.append({
                    "name": attribute_name,
                    "value": attribute_value
                })
            if entry.details_link is None:
                entry.details_link = self.get_details_link(entry.indexerguid)

            if usenetdate is None:
                # Not provided by attributes, use pubDate instead
                usenetdate = arrow.get(entry.pubDate,
                                       'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = usenetdate.timestamp
            entry.pubdate_utc = str(usenetdate)
            entry.age_days = (arrow.utcnow() - usenetdate).days
            entry.precise_date = True

            # Map category. Try to find the most specific category (like 2040), then the more general one (like 2000)
            categories = sorted(
                categories, reverse=True
            )  # Sort to make the most specific category appear first
            if len(categories) > 0:
                for k, v in categories_to_newznab.items():
                    for c in categories:
                        if c in v:
                            entry.category = k
                            break

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)
            if maxResults is not None and len(entries) == maxResults:
                break

        response_total_offset = tree.find(
            "./channel[1]/newznab:response",
            {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"})
        if response_total_offset is None or response_total_offset.attrib[
                "total"] == "" or response_total_offset.attrib["offset"] == "":
            self.warn(
                "Indexer returned a result page without total results and offset. Shame! *rings bell*"
            )
            offset = 0
            total = len(entries)
        else:
            total = int(response_total_offset.attrib["total"])
            offset = int(response_total_offset.attrib["offset"])
        if total == 0 or len(entries) == 0:
            self.info("Query returned no results")
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total=total,
                                       total_known=True,
                                       has_more=offset + len(entries) < total,
                                       rejected=countRejected)
Пример #15
0
def check_caps(host, apikey, userAgent=None, timeout=None):
    toCheck = [{
        "t": "tvsearch",
        "id": "tvdbid",
        "key": "121361",
        "expected": "Thrones"
    }, {
        "t": "movie",
        "id": "imdbid",
        "key": "0848228",
        "expected": "Avengers"
    }, {
        "t": "tvsearch",
        "id": "rid",
        "key": "24493",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tvmazeid",
        "key": "82",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "traktid",
        "key": "1390",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tmdbid",
        "key": "1399",
        "expected": "Thrones"
    }]
    supportedIds = []
    supportedTypes = []
    #Try to find out from caps first
    try:
        url = _build_base_url(host, apikey, "caps", None)
        headers = {
            'User-Agent':
            userAgent
            if userAgent is not None else config.settings.searching.userAgent
        }
        logger.debug("Requesting %s" % url)
        r = requests.get(url,
                         verify=False,
                         timeout=timeout if timeout is not None else
                         config.settings.searching.timeout,
                         headers=headers)
        r.raise_for_status()

        tree = ET.fromstring(r.content)
        searching = tree.find("searching")
        doBruteForce = False
        if searching is not None:
            tvsearch = searching.find("tv-search")
            if tvsearch is not None and tvsearch.attrib["available"] == "yes":
                supportedTypes.append("tvsearch")
                logger.debug("Found supported TV search")
                if "supportedParams" in tvsearch.attrib:
                    params = tvsearch.attrib["supportedParams"]
                    params = params.split(",")
                    for x in ["q", "season", "ep"]:
                        if x in params:
                            params.remove(x)
                    supportedIds.extend(params)
                    logger.debug("Found supported TV IDs: %s" % params)
                else:
                    doBruteForce = True
            movie_search = searching.find("movie-search")
            if movie_search is not None and movie_search.attrib[
                    "available"] == "yes":
                supportedTypes.append("movie")
                logger.debug("Found supported movie search")
                if "supportedParams" in movie_search.attrib:
                    params = movie_search.attrib["supportedParams"]
                    params = params.split(",")
                    for x in ["q", "genre"]:
                        if x in params:
                            params.remove(x)
                    supportedIds.extend(params)
                    logger.debug("Found supported movie IDs: %s" % params)
                else:
                    doBruteForce = True
            book_search = searching.find("book-search")
            if book_search is not None and book_search.attrib[
                    "available"] == "yes":
                supportedTypes.append("movie")
                logger.debug("Found supported book search")

            can_handle = [y["id"] for y in toCheck]
            supportedIds = [x for x in supportedIds
                            if x in can_handle]  #Only use those we can handle
            supportedIds = set(
                supportedIds
            )  # Return a set because IMDB might be included for TV and movie search, for example

            if doBruteForce:
                logger.info(
                    "Unable to read supported params from caps. Will continue with brute force"
                )
                return checkCapsBruteForce(supportedTypes, toCheck, host,
                                           apikey)
            return sorted(list(set(supportedIds))), sorted(
                list(set(supportedTypes)))

    except HTTPError as e:
        logger.error("Error while trying to determine caps: %s" % e)
        raise IndexerResultParsingException(
            "Unable to check caps: %s" % str(e), None)
    except Exception as e:
        logger.error(
            "Error getting or parsing caps XML. Will continue with brute force. Error message: %s"
            % e)
        return checkCapsBruteForce(supportedTypes, toCheck, host, apikey)
Пример #16
0
def check_caps(host,
               apikey,
               userAgent=None,
               timeout=None,
               skipIdsAndTypes=False):
    toCheck = [{
        "t": "tvsearch",
        "id": "tvdbid",
        "key": "121361",
        "expected": "Thrones"
    }, {
        "t": "movie",
        "id": "imdbid",
        "key": "0848228",
        "expected": "Avengers"
    }, {
        "t": "tvsearch",
        "id": "rid",
        "key": "24493",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tvmazeid",
        "key": "82",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "traktid",
        "key": "1390",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tmdbid",
        "key": "1399",
        "expected": "Thrones"
    }]
    supportedIds = []
    supportedTypes = []
    # Try to find out from caps first
    try:
        url = _build_base_url(host, apikey, "caps", None)
        headers = {
            'User-Agent':
            userAgent
            if userAgent is not None else config.settings.searching.userAgent
        }
        logger.debug("Requesting %s" % url)
        r = requests.get(url,
                         verify=False,
                         timeout=timeout if timeout is not None else
                         config.settings.searching.timeout,
                         headers=headers)
        r.raise_for_status()

        tree = ET.fromstring(r.content)

        categories = []
        subCategories = {}
        for xmlMainCategory in tree.find("categories").findall("category"):
            categories.append(xmlMainCategory.attrib["name"].lower())
            for subcat in xmlMainCategory.findall("subcat"):
                subCategories[subcat.attrib["id"]] = subcat.attrib["name"]
        animeCategory = getCategoryNumberOrNone(
            subCategories, ["5070", "7040"],
            ["anime", "tv/anime", "tv->anime"])
        comicCategory = getCategoryNumberOrNone(
            subCategories, ["7030"], ["comic", "comics", "books/comics"])
        magazineCategory = getCategoryNumberOrNone(
            subCategories, ["7010"], ["magazine", "mags", "magazines"])
        audiobookCategory = getCategoryNumberOrNone(
            subCategories, ["3030"], ["audiobook", "audio", "audio/audiobook"])
        ebookCategory = getCategoryNumberOrNone(subCategories,
                                                ["7020", "4050"], ["ebook"])
        supportedCategories = []
        if "movies" in categories:
            supportedCategories.extend(["movies", "movieshd", "moviessd"])
        if "tv" in categories:
            supportedCategories.extend(["tv", "tvhd", "tvsd"])
        if "audio" in categories:
            supportedCategories.extend(["audio", "flac", "mp3"])
        if "xxx" in categories or "adult" in categories:
            supportedCategories.append("xxx")
        if "console" in categories or "gaming" in categories:
            supportedCategories.append("console")
        if "apps" in categories or "pc" in categories:
            supportedCategories.append("pc")
        if animeCategory:
            supportedCategories.append("anime")
        if comicCategory:
            supportedCategories.append("comic")
        if audiobookCategory:
            supportedCategories.append("audiobook")
        if ebookCategory:
            supportedCategories.append("ebook")

        searching = tree.find("searching")
        doBruteForce = False
        if searching is not None and not skipIdsAndTypes:
            tvsearch = searching.find("tv-search")
            if tvsearch is not None and tvsearch.attrib["available"] == "yes":
                supportedTypes.append("tvsearch")
                logger.debug("Found supported TV search")
                if "supportedParams" in tvsearch.attrib:
                    params = tvsearch.attrib["supportedParams"]
                    params = params.split(",")
                    for x in ["q", "season", "ep"]:
                        if x in params:
                            params.remove(x)
                    supportedIds.extend(params)
                    logger.debug("Found supported TV IDs: %s" % params)
                else:
                    doBruteForce = True
            movie_search = searching.find("movie-search")
            if movie_search is not None and movie_search.attrib[
                    "available"] == "yes":
                supportedTypes.append("movie")
                logger.debug("Found supported movie search")
                if "supportedParams" in movie_search.attrib:
                    params = movie_search.attrib["supportedParams"]
                    params = params.split(",")
                    for x in ["q", "genre"]:
                        if x in params:
                            params.remove(x)
                    supportedIds.extend(params)
                    logger.debug("Found supported movie IDs: %s" % params)
                else:
                    doBruteForce = True
            book_search = searching.find("book-search")
            if book_search is not None and book_search.attrib[
                    "available"] == "yes":
                supportedTypes.append("movie")
                logger.debug("Found supported book search")

            can_handle = [y["id"] for y in toCheck]
            supportedIds = [x for x in supportedIds
                            if x in can_handle]  # Only use those we can handle

        if doBruteForce and not skipIdsAndTypes:
            logger.info(
                "Unable to read supported params from caps. Will continue with brute force"
            )
            supportedIds, supportedTypes = checkCapsBruteForce(
                supportedTypes, toCheck, host, apikey)
        return {
            "animeCategory":
            animeCategory,
            "comicCategory":
            comicCategory,
            "magazineCategory":
            magazineCategory,
            "audiobookCategory":
            audiobookCategory,
            "ebookCategory":
            ebookCategory,
            "supportedIds":
            sorted(list(set(supportedIds))),
            "supportedTypes":
            sorted(list(set(supportedTypes))),
            "supportedCategories":
            supportedCategories,
            "supportsAllCategories":
            len(supportedCategories) == getNumberOfSelectableCategories() -
            1  #Without "all
        }

    except HTTPError as e:
        logger.error("Error while trying to determine caps: %s" % e)
        raise IndexerResultParsingException(
            "Unable to check caps: %s" % str(e), None)
    except Exception as e:
        logger.error("Error getting or parsing caps XML. Error message: %s" %
                     e)
        return None
Пример #17
0
def check_caps(host,
               apikey,
               username=None,
               password=None,
               userAgent=None,
               timeout=None,
               skipIdsAndTypes=False):
    toCheck = [{
        "t": "tvsearch",
        "id": "tvdbid",
        "key": "121361",
        "expected": "Thrones"
    }, {
        "t": "movie",
        "id": "imdbid",
        "key": "0848228",
        "expected": "Avengers"
    }, {
        "t": "tvsearch",
        "id": "rid",
        "key": "24493",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tvmazeid",
        "key": "82",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "traktid",
        "key": "1390",
        "expected": "Thrones"
    }, {
        "t": "tvsearch",
        "id": "tmdbid",
        "key": "1399",
        "expected": "Thrones"
    }]
    supportedIds = []
    supportedTypes = []
    # Try to find out from caps first
    try:
        url = _build_base_url(host, apikey, "caps", None)
        headers = {
            'User-Agent':
            userAgent
            if userAgent is not None else config.settings.searching.userAgent
        }
        logger.debug("Requesting %s" % url)
        r = webaccess.get(url,
                          timeout=timeout if timeout is not None else
                          config.settings.searching.timeout,
                          headers=headers,
                          auth=HTTPBasicAuth(username, password)
                          if username is not None else None)
        r.raise_for_status()

        tree = ET.fromstring(r.content)

        categories = []
        subCategories = {}
        for xmlMainCategory in tree.find("categories").findall("category"):
            categories.append(xmlMainCategory.attrib["name"].lower())
            for subcat in xmlMainCategory.findall("subcat"):
                subCategories[subcat.attrib["id"]] = subcat.attrib["name"]
        animeCategory = getCategoryNumberOrNone(
            subCategories, ["5070", "7040"],
            ["anime", "tv/anime", "tv->anime"])
        comicCategory = getCategoryNumberOrNone(
            subCategories, ["7030"], ["comic", "comics", "books/comics"])
        magazineCategory = getCategoryNumberOrNone(
            subCategories, ["7010"], ["magazine", "mags", "magazines"])
        audiobookCategory = getCategoryNumberOrNone(
            subCategories, ["3030"], ["audiobook", "audio", "audio/audiobook"])
        ebookCategory = getCategoryNumberOrNone(subCategories,
                                                ["7020", "4050"], ["ebook"])
        supportedCategories = []
        if "movies" in categories:
            supportedCategories.extend(["movies", "movieshd", "moviessd"])
        if "tv" in categories:
            supportedCategories.extend(["tv", "tvhd", "tvsd"])
        if "audio" in categories or "music" in categories:
            supportedCategories.extend(["audio", "flac", "mp3"])
        if "xxx" in categories or "adult" in categories:
            supportedCategories.append("xxx")
        if "console" in categories or "gaming" in categories or "games" in categories:
            supportedCategories.append("console")
        if "apps" in categories or "pc" in categories:
            supportedCategories.append("pc")
        if animeCategory:
            supportedCategories.append("anime")
        if comicCategory:
            supportedCategories.append("comic")
        if audiobookCategory:
            supportedCategories.append("audiobook")
        if ebookCategory:
            supportedCategories.append("ebook")

        searching = tree.find("searching")
        if searching is not None and not skipIdsAndTypes:
            book_search = searching.find("book-search")
            if book_search is not None and book_search.attrib[
                    "available"] == "yes":
                supportedTypes.append("movie")
                logger.debug("Found supported book search")

            can_handle = [y["id"] for y in toCheck]
            supportedIds = [x for x in supportedIds
                            if x in can_handle]  # Only use those we can handle

        if not skipIdsAndTypes:
            logger.info(
                "Checking capabilities of indexer by brute force to make sure supported search types are correctly recognized"
            )
            supportedIds, supportedTypes = checkCapsBruteForce(
                supportedTypes,
                toCheck,
                host,
                apikey,
                username=username,
                password=password)

        #Check indexer type (nzedb, newznab, nntmux)
        url = _build_base_url(host, apikey, "tvsearch", None)
        headers = {
            'User-Agent':
            userAgent
            if userAgent is not None else config.settings.searching.userAgent
        }
        logger.debug("Requesting %s" % url)
        r = webaccess.get(url,
                          timeout=timeout if timeout is not None else
                          config.settings.searching.timeout,
                          headers=headers,
                          auth=HTTPBasicAuth(username, password)
                          if username is not None else None)
        r.raise_for_status()
        generator = ET.fromstring(r.content).find("channel/generator")
        if generator is not None:
            backend = generator.text
            logger.info(
                "Found generator tag indicating that indexer %s is a %s based indexer"
                % (host, backend))
        else:
            logger.info("Assuming indexer %s is a newznab based indexer" %
                        host)
            backend = "newznab"

        return {
            "animeCategory":
            animeCategory,
            "comicCategory":
            comicCategory,
            "magazineCategory":
            magazineCategory,
            "audiobookCategory":
            audiobookCategory,
            "ebookCategory":
            ebookCategory,
            "supportedIds":
            sorted(list(set(supportedIds))),
            "supportedTypes":
            sorted(list(set(supportedTypes))),
            "supportedCategories":
            supportedCategories,
            "supportsAllCategories":
            len(supportedCategories) == getNumberOfSelectableCategories() -
            1,  #Without "all
            "backend":
            backend
        }
    except Exception as e:
        logger.error("Error getting or parsing caps XML. Error message: %s" %
                     e)
        raise IndexerResultParsingException(
            "Unable to check caps: %s" % str(e), None)
Пример #18
0
def check_caps(host, apikey):
    toCheck = [
        {"t": "tvsearch",
         "id": "tvdbid",
         "key": "121361",
         "expected": "Thrones"
         },
        {"t": "movie",
         "id": "imdbid",
         "key": "0848228",
         "expected": "Avengers"
         },
        {"t": "tvsearch",
         "id": "rid",
         "key": "24493",
         "expected": "Thrones"
         },
        {"t": "tvsearch",
         "id": "tvmazeid",
         "key": "82",
         "expected": "Thrones"
         },
        {"t": "tvsearch",
         "id": "traktid",
         "key": "1390",
         "expected": "Thrones"
         },
        {"t": "tvsearch",
         "id": "tmdbid",
         "key": "1399",
         "expected": "Thrones"
         }

    ]
    result = []
    #Try to find out from caps first
    try:
        url = _build_base_url(host, apikey, "caps", None)
        headers = {
            'User-Agent': config.settings.searching.userAgent
        }
        logger.debug("Requesting %s" % url)
        r = requests.get(url, verify=False, timeout=config.settings.searching.timeout, headers=headers)
        r.raise_for_status()
        
        tree = ET.fromstring(r.content)
        searching = tree.find("searching")
        
        if searching is not None:
            tvsearch = searching.find("tv-search")
            if tvsearch is not None and tvsearch.attrib["available"] == "yes":
                params = tvsearch.attrib["supportedParams"]
                params = params.split(",")
                for x in ["q", "season", "ep"]:
                    if x in params:
                        params.remove(x)
                result.extend(params)
                logger.debug("Found supported TV IDs: %s" % params)
            movie_search = searching.find("movie-search")
            if movie_search is not None and movie_search.attrib["available"] == "yes":
                params = movie_search.attrib["supportedParams"]
                params = params.split(",")
                for x in ["q", "genre"]:
                    if x in params:
                        params.remove(x)         
                result.extend(params)
                logger.debug("Found supported movie IDs: %s" % params)
            can_handle = [y["id"] for y in toCheck]
            result = [x for x in result if x in can_handle] #Only use those we can handle
            result = set(result)  # Return a set because IMDB might be included for TV and movie search, for example
            
            return set(result)  
        
    except Exception as e:
        logger.error("Error getting or parsing caps XML. Will continue with brute force. Error message: %s" % e)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=len(toCheck)) as executor:
        futures_to_ids = {executor.submit(_testId, host, apikey, x["t"], x["id"], x["key"], x["expected"]): x["id"] for x in toCheck}
        for future in concurrent.futures.as_completed(futures_to_ids):
            id = futures_to_ids[future]
            try:
                supported = future.result()
                if supported:
                    result.append(id)
            except Exception as e:
                logger.error("An error occurred while trying to test the caps of host %s: %s" % (host, e))
                raise IndexerResultParsingException("Unable to check caps: %s" % str(e), None)
    return set(result)