Пример #1
0
def find_xdcc_eu_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Method that conducts the xdcc pack search for xdcc.eu

    :return: the search results as a list of XDCCPack objects
    """
    url = "https://www.xdcc.eu/search.php?searchkey=" + search_phrase
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    entries = soup.select("tr")
    entries.pop(0)

    packs = []
    for entry in entries:
        parts = entry.select("td")
        info = parts[1].select("a")[1]
        server = IrcServer(info["data-s"])
        pack_message = info["data-p"]
        bot, pack_number = pack_message.split(" xdcc send #")

        size = byte_string_to_byte_count(parts[5].text)
        filename = parts[6].text

        pack = XDCCPack(server, bot, int(pack_number))
        pack.set_size(size)
        pack.set_filename(filename)

        packs.append(pack)

    return packs
Пример #2
0
    def search(self, search_term) -> List[TorrentInfo]:
        """
        Performs the actual search
        :param search_term: The term to search for
        :return: The search results
        """
        url = f"https://nyaa.si/?q={search_term.replace(' ', '+')}" \
              f"&s=seeders&o=desc"
        page = requests.get(url)
        soup = BeautifulSoup(page.text, "html.parser")
        rows = soup.select("tr")

        if len(rows) == 0:
            return []

        rows.pop(0)

        results = []
        for row in rows:
            columns = row.select("td")
            name = columns[1].select("a")[-1].text.strip()
            links = columns[2].select("a")
            torrent_file = \
                os.path.join("https://nyaa.si", links[0]["href"][1:])
            magnet_link = links[1]["href"]
            size_string = columns[3].text.replace("i", "").replace(" ", "")
            size = byte_string_to_byte_count(size_string)
            results.append(TorrentInfo(name, torrent_file, magnet_link, size))

        return results
Пример #3
0
    def test_byte_conversion(self):
        """
        Tests that byte strings can be parsed correctly and displayed in
        a human-readable format
        :return: None
        """
        for string, count in [("1MB", 1000000), ("1.024KB", 1024),
                              ("0.123KB", 123), ("1.234GB", 1234000000)]:
            self.assertEqual(string, human_readable_bytes(count))
            self.assertEqual(byte_string_to_byte_count(string), count)

        # Test if human-readable strings are rounded correctly
        self.assertEqual("1.234GB", human_readable_bytes(1234123123))
Пример #4
0
def find_nibl_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Searches for XDCC Packs matching the specified search string on nibl.co.uk

    :param search_phrase: The search phrase to search for
    :return:              The list of found XDCC Packs
    """

    # Prepare the search term, nibl.co.uk uses + symbols as spaces.
    split_search_term = search_phrase.split(" ")
    prepared_search_term = split_search_term[0]
    i = 1
    while i < len(split_search_term):
        prepared_search_term += "+" + split_search_term[i]
        i += 1

    # Get the data from the website

    url = "https://nibl.co.uk/bots.php?search=" + prepared_search_term
    html = requests.get(url).text

    content = BeautifulSoup(html, "html.parser")
    file_names = content.select(".filename")
    pack_numbers = content.select(".packnumber")
    bot_names = content.select(".name")
    file_sizes = content.select(".filesize")

    results = []
    i = 0  # We need a counter variable since we have four lists of data

    while i < len(file_names):

        # The filename has two links after it, which need to be cut out
        filename = file_names[i].text.rsplit(" \n", 1)[0]

        # The bot name has a link after it, which needs to be cut out
        bot = bot_names[i].text.rsplit(" ", 1)[0]

        server = "irc.rizon.net"
        packnumber = int(pack_numbers[i].text)
        size = file_sizes[i].text.lower()

        result = XDCCPack(IrcServer(server), bot, packnumber)

        result.set_size(byte_string_to_byte_count(size))
        result.set_filename(filename)
        results.append(result)
        i += 1

    return results
Пример #5
0
def get_page_results(page_content: BeautifulSoup) -> List[XDCCPack]:
    """
    This parses a single ixIRC page to find all search results from that page

    :param page_content: The Beautifulsoup-parsed content of the page
    :return:             A list of XDCC Packs on that page
    """
    results = []
    packs = page_content.select("td")

    # Initialize the pack variables
    file_name = ""
    bot = ""
    server = ""
    pack_number = 0
    size = ""

    # Keeps track of which column the parser is currently working on
    column_count = 0

    # Counts how often the word "ago" was used,
    # which is used to keep track of on which
    # pack we currently are. Each pack has two instances of 'ago' occurring.
    ago_count = 0

    # The process is aborted whenever an invalid pack is encountered
    aborted = False

    # Flag that lets other parts of the loop know that
    # we are moving on to the next pack
    next_element = False

    # line_part is a x,y section of the rows and columns of the website.
    # We go through it in the order Left->Right, Top->Bottom
    for line_part in packs:

        if next_element and line_part.text == "":
            # Jumps to the next not-empty element
            # if we are currently jumping to the next pack
            continue

        elif next_element and not line_part.text == "":
            # We reached a new pack, start parsing the new pack
            next_element = False

        elif not next_element and line_part.text == "":
            # Invalid pack element if a string == "" in the middle of the pack,
            # abort the pack and jump to next element
            aborted = True

        elif "ago" in line_part.text and column_count > 6:
            # Counts the number of times 'ago' is seen by the parser.
            # The last two elements of a pack both end
            # with 'ago', which makes it ideal to use as a marker
            # for when a single pack element ends.
            # This only starts counting once we got all relevant information
            # from the pack itself (column_count > 6)
            # to avoid conflicts when the substring 'ago'
            # is contained inside the file name
            ago_count += 1

        # This gets the information from the pack and stores
        # them into variables.
        # This gets skipped if it has been established that the pack is invalid
        if not aborted:
            if column_count == 0:
                # File Name
                file_name = line_part.text
            elif column_count == 1:
                # Server Address
                server = "irc." + line_part.text.lower() + ".net"
            elif column_count == 2:
                # Channel Information, not needed due to /whois IRC queries
                pass
            elif column_count == 3:
                # Bot Name
                bot = line_part.text
            elif column_count == 4:
                # Pack Number
                pack_number = int(line_part.text)
            elif column_count == 5:
                pass  # This is the 'gets' section, we don't need that
            elif column_count == 6:
                size = line_part.text.replace("\xa0", " ").replace(" ", "")

        # Resets state after a pack was successfully parsed,
        # and adds xdcc pack to results
        if not aborted and ago_count == 2:
            ago_count = 0
            column_count = 0
            next_element = True

            # Generate XDCCPack and append it to the list
            result = XDCCPack(IrcServer(server), bot, pack_number)
            result.set_filename(file_name)
            result.set_size(byte_string_to_byte_count(size))
            results.append(result)

        # Resets state after invalid pack
        elif aborted and ago_count == 2:
            aborted = False
            ago_count = 0
            column_count = 0
            next_element = True

        if not next_element:
            # Only increment column_count in the middle of a pack,
            # not when we jump to the next pack element
            column_count += 1

    return results
Пример #6
0
    def __init__(self,
                 pack: XDCCPack,
                 retry: bool = False,
                 timeout: int = 120,
                 fallback_channel: Optional[str] = None,
                 throttle: Union[int, str] = -1):
        """
        Initializes the XDCC IRC client
        :param pack: The pack to downloadX
        :param retry: Set to true for retried downloads.
        :param timeout: Sets the timeout time for starting downloads
        :param fallback_channel: A fallback channel for when whois
                                 fails to find a valid channel
        :param throttle: Throttles the download to n bytes per second.
                         If this value is <= 0, the download speed will be
                         unlimited
        """
        self.logger = ColorLogger(logging.getLogger(self.__class__.__name__),
                                  warning_bg=Back.RED,
                                  warning_fg=Fore.BLACK)

        # Save us from decoding errors and excessive logging output!
        irc.client.ServerConnection.buffer_class.errors = "replace"
        irc.client.log.setLevel(logging.ERROR)

        if isinstance(throttle, str):
            self.download_limit = byte_string_to_byte_count(throttle)
        else:
            self.download_limit = throttle
        if self.download_limit <= 0:
            self.download_limit = -1

        self.user = User()
        self.pack = pack
        self.server = pack.server
        self.downloading = False
        self.xdcc_timestamp = 0.0
        self.channels = None  # type: Optional[List[str]]
        self.message_sent = False
        self.connect_start_time = 0.0
        self.timeout = timeout
        self.timed_out = False
        self.fallback_channel = fallback_channel
        self.connected = True
        self.disconnected = False

        # XDCC state variables
        self.peer_address = ""
        self.peer_port = -1
        self.filesize = -1
        self.progress = 0
        self.xdcc_file = None  # type: Optional[IO[Any]]
        self.xdcc_connection = None  # type: Optional[DCCConnection]
        self.retry = retry
        self.struct_format = b"!I"
        self.ack_lock = Lock()

        if not self.retry:
            if self.download_limit == -1:
                limit = "\"unlimited\""
            else:
                limit = str(self.download_limit)
            self.logger.info("Download Limit set to: " + limit)

        self.timeout_watcher_thread = Thread(target=self.timeout_watcher)
        self.progress_printer_thread = Thread(target=self.progress_printer)

        super().__init__()
Пример #7
0
    def test_byte_string_to_byte_count(self):
        """
        Tests that human readable bytes are converted into byte count.
        :return: None
        """
        self.assertEqual(1, byte_string_to_byte_count("1"))
        self.assertEqual(10**3, byte_string_to_byte_count("1k"))
        self.assertEqual(10**6, byte_string_to_byte_count("1m"))
        self.assertEqual(10**9, byte_string_to_byte_count("1g"))
        self.assertEqual(1500, byte_string_to_byte_count("1.5k"))
        self.assertEqual(1500 * 10**3, byte_string_to_byte_count("1.5m"))
        self.assertEqual(1500 * 10**6, byte_string_to_byte_count("1.5g"))

        with self.assertRaises(ValueError):
            byte_string_to_byte_count("1.1.1.1")
        with self.assertRaises(ValueError):
            byte_string_to_byte_count("1.5")
        with self.assertRaises(ValueError):
            byte_string_to_byte_count("1.5h")