Пример #1
0
    def from_content(cls, content, announcement_id=0):
        """Parses the content of an announcement's page from Tibia.com

        Parameters
        ----------
        content: :class:`str`
            The HTML content of an announcement in Tibia.com
        announcement_id: :class:`int`
            The id of the announcement. Since there is no way to obtain the id from the page,
            the id may be passed to assing.

        Returns
        -------
        :class:`ForumAnnouncement`
            The announcement contained in the page or :obj:`None` if not found.

        Raises
        ------
        InvalidContent
            If content is not the HTML content of an announcement page in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table", attrs={"width": "100%"})
        root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])]
        if not root_tables:
            error_table = parsed_content.find("table", attrs={"class": "Table1"})
            if error_table and "not be found" in error_table.text:
                return None
            raise errors.InvalidContent("content is not a Tibia.com forum announcement.")
        forum_info_table, posts_table, footer_table = root_tables

        section_link, board_link, *_ = forum_info_table.find_all("a")
        section = section_link.text
        section_id = int(section_id_regex.search(section_link["href"]).group(1))
        board = board_link.text
        board_id = int(board_id_regex.search(board_link["href"]).group(1))

        announcement = cls(section=section, section_id=section_id, board=board, board_id=board_id,
                           announcement_id=announcement_id)

        timezone = timezone_regex.search(footer_table.text).group(1)
        offset = 1 if timezone == "CES" else 2

        announcement_container = posts_table.find("td", attrs={"class": "CipPost"})
        character_info_container = announcement_container.find("div", attrs={"class": "PostCharacterText"})
        announcement.author = ForumAuthor._parse_author_table(character_info_container)

        post_container = posts_table.find("div", attrs={"class": "PostText"})
        title_tag = post_container.find("b")
        announcement.title = title_tag.text
        dates_container = post_container.find("font")
        dates = post_dates_regex.findall(dates_container.text)
        announcement_content = post_container.encode_contents().decode()
        _, announcement_content = announcement_content.split("<hr/>", 1)
        announcement.content = announcement_content

        announcement.start_date, announcement.end_date = (parse_tibia_forum_datetime(date, offset) for date in dates)

        return announcement
Пример #2
0
    def from_content(cls, content):
        """Parse the content of the leaderboards page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the leaderboards page.

        Returns
        -------
        :class:`Leaderboard`
            The ledaerboard if found.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all("table",
                                             {"class": "TableContent"})
            form = parsed_content.find("form")
            data = parse_form_data(form, include_options=True)
            current_world = data["world"]
            current_rotation = None
            rotations = []
            for label, value in data["__options__"]["rotation"].items():
                current = False
                if "Current" in label:
                    label = "".join(rotation_end_pattern.findall(label))
                    current = True
                rotation_end = parse_tibia_datetime(label)
                rotation = LeaderboardRotation(int(value), rotation_end,
                                               current)
                if value == data["rotation"]:
                    current_rotation = rotation
                rotations.append(rotation)
            leaderboard = cls(current_world, current_rotation)
            leaderboard.available_worlds = [
                w for w in data["__options__"]["world"].values() if w
            ]
            leaderboard.available_rotations = rotations
            if leaderboard.rotation and leaderboard.rotation.current:
                last_update_table = tables[2]
                numbers = re.findall(r'(\d+)', last_update_table.text)
                if numbers:
                    leaderboard.last_update = datetime.timedelta(
                        minutes=int(numbers[0]))
            leaderboard._parse_entries(tables[-1])
            pagination_block = parsed_content.find("small")
            pages, total, count = parse_pagination(
                pagination_block) if pagination_block else (0, 0, 0)
            leaderboard.page = pages
            leaderboard.total_pages = total
            leaderboard.results_count = count
            return leaderboard
        except (AttributeError, ValueError) as e:
            raise errors.InvalidContent(
                "content does not belong to the leaderboards", e)
Пример #3
0
    def from_content(cls, content):
        """Parse the content of a spells page.

        Parameters
        -----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        ----------
        :class:`Spell`
            The spell data. If the spell doesn't exist, this will be :obj:`None`.

        Raises
        ------
        InvalidContent
            If content is not the HTML of the spells section.
        """
        parsed_content = parse_tibiacom_content(content)
        try:
            tables = parse_tibiacom_tables(parsed_content)
            title_table = parsed_content.find("table", attrs={"class": False})
            spell_table = tables["Spell Information"]
            img = title_table.find("img")
            url = urllib.parse.urlparse(img["src"])
            filename = os.path.basename(url.path)
            identifier = str(filename.split(".")[0])
            next_sibling = title_table.next_sibling
            description = ""
            while next_sibling:
                if isinstance(next_sibling, bs4.Tag):
                    if next_sibling.name == "br":
                        description += "\n"
                    elif next_sibling.name == "table":
                        break
                    else:
                        description += next_sibling.text
                elif isinstance(next_sibling, bs4.NavigableString):
                    description += str(next_sibling)
                next_sibling = next_sibling.next_sibling
            spell = cls._parse_spells_table(identifier, spell_table)
            spell.description = description.strip()
            if "Rune Information" in tables:
                spell.rune = cls._parse_rune_table(tables["Rune Information"])
            return spell
        except (TypeError, AttributeError, IndexError, KeyError) as e:
            form = parsed_content.find("form")
            if form:
                data = parse_form_data(form)
                if "subtopic=spells" in data.get("__action__"):
                    return None
            raise errors.InvalidContent("content is not a spell page", e)
Пример #4
0
    def list_from_content(cls, content):
        """Parses the content of a board list Tibia.com into a list of boards.

        Parameters
        ----------
        content: :class:`str`
            The raw HTML response from the board list.

        Returns
        -------
        :class:`list` of :class:`ListedBoard`

        Raises
        ------
        InvalidContent`
            Content is not a board list in Tibia.com
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all("table", attrs={"width": "100%"})
            _, board_list_table, timezone_table = tables
            _, *board_rows = board_list_table.find_all("tr")
            timezone_text = timezone_table.text
            timezone = timezone_regex.search(timezone_text).group(1)
            offset = 1 if timezone == "CES" else 2
            boards = []
            for board_row in board_rows[:-3]:
                try:
                    board = cls._parse_board_row(board_row, offset)
                except IndexError:
                    continue
                else:
                    boards.append(board)
            return boards
        except ValueError as e:
            raise errors.InvalidContent("content does not belong to a forum section.", e)
Пример #5
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of the thread's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`ForumThread`
            The thread contained in the page, or None if the thread doesn't exist

        Raises
        ------
        InvalidContent
            If content is not the HTML of a thread's page.
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table")
        root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])]
        if not root_tables:
            error_table = parsed_content.find("table", attrs={"class": "Table1"})
            if error_table and "not found" in error_table.text:
                return None
            raise errors.InvalidContent("content is not a Tibia.com forum thread.")
        try:
            if len(root_tables) == 4:
                forum_info_table, title_table, posts_table, footer_table = root_tables
            else:
                forum_info_table, title_table, footer_table = root_tables
                posts_table = None
        except ValueError as e:
            raise errors.InvalidContent("content is not a Tibia.com forum thread.", e)

        header_text = forum_info_table.text
        section, board, *_ = split_list(header_text, "|", "|")

        thread = cls(section=section, board=board)

        thread.title = title_table.text.strip()
        golden_frame = title_table.find("div", attrs={"class": "CipPost"})
        thread.golden_frame = golden_frame is not None

        timezone = timezone_regex.search(footer_table.text).group(1)
        time_page_column, navigation_column = footer_table.find_all("td", attrs={"class", "ff_white"})
        page_links = time_page_column.find_all("a")
        if page_links:
            last_link = page_links[-1]["href"]
            thread.page = int(footer_table.find("span").text)
            thread.total_pages = max(int(page_number_regex.search(last_link).group(1)), thread.page)

        navigation_links = navigation_column.find_all("a")
        if len(navigation_links) == 2:
            prev_link, next_link = navigation_links
            prev_link_url = prev_link["href"]
            thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1))
            next_link_url = next_link["href"]
            thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1))
        elif "Previous" in navigation_links[0].text:
            prev_link_url = navigation_links[0]["href"]
            thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1))
        else:
            next_link_url = navigation_links[0]["href"]
            thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1))
        offset = 1 if timezone == "CES" else 2

        if posts_table:
            thread_info_table, *post_tables = posts_table.find_all("div", attrs={"class": "ForumPost"})
            inner_info_table = thread_info_table.find("table")
            thread_num_col, thread_pages_col, thread_navigation_col = inner_info_table.find_all("td")
            thread.thread_id = int(thread_num_col.text.replace("Thread #", ""))
            for post_table in post_tables:
                post = cls._parse_post_table(post_table, offset)
                thread.posts.append(post)
        return thread
Пример #6
0
    def from_content(cls, content):
        """Parses the board's HTML content from Tibia.com.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the board.

        Returns
        -------
        :class:`ForumBoard`
            The forum board contained.

        Raises
        ------
        InvalidContent`
            Content is not a board in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table")
        try:
            header_table, time_selector_table, threads_table, timezone_table, boardjump_table, *_ = tables
        except ValueError as e:
            raise errors.InvalidContent("content is not a forum board", e)
        header_text = header_table.text.strip()
        section, name = split_list(header_text, "|", "|")

        board = cls(name=name, section=section)
        thread_rows = threads_table.find_all("tr")

        age_selector = time_selector_table.find("select")
        if not age_selector:
            return cls(section=section, name=name)
        selected_age = age_selector.find("option", {"selected": True})
        if selected_age:
            board.age = int(selected_age["value"])

        board_selector = boardjump_table.find("select")
        selected_board = board_selector.find("option", {"selected": True})
        board.board_id = int(selected_board["value"])

        page_info = threads_table.find("td", attrs={"class": "ff_info"})
        if page_info:
            current_page_text = page_info.find("span")
            page_links = page_info.find_all("a")
            if current_page_text:
                board.page = int(current_page_text.text)
                board.total_pages = max(board.page, int(page_number_regex.search(page_links[-1]["href"]).group(1)))

        for thread_row in thread_rows[1:]:
            columns = thread_row.find_all("td")
            if len(columns) != 7:
                continue

            entry = cls._parse_thread_row(columns)
            if isinstance(entry, ListedThread):
                board.threads.append(entry)
                cip_border = thread_row.find("div", attrs={"class": "CipBorder"})
                if cip_border:
                    entry.golden_frame = True
            elif isinstance(entry, ListedAnnouncement):
                board.announcements.append(entry)

        return board
Пример #7
0
    def from_content(cls, content):
        """Parses the content of the CM Post Archive page from Tibia.com

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the CM Post Archive in Tibia.com

        Returns
        -------
        :class:`CMPostArchive`
            The CM Post archive found in the page.

        Raises
        ------
        InvalidContent
            If content is not the HTML content of the CM Post Archive in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)

        form = parsed_content.find("form")
        try:
            start_month_selector, start_day_selector, start_year_selector, \
             end_month_selector, end_day_selector, end_year_selector = form.find_all("select")
            start_date = cls._get_selected_date(start_month_selector, start_day_selector, start_year_selector)
            end_date = cls._get_selected_date(end_month_selector, end_day_selector, end_year_selector)
        except (AttributeError, ValueError) as e:
            raise errors.InvalidContent("content does not belong to the CM Post Archive in Tibia.com", e)
        cm_archive = cls(start_date=start_date, end_date=end_date)
        table = parsed_content.find("table", attrs={"class", "Table3"})
        if not table:
            return cm_archive
        inner_table_container = table.find("div", attrs={"class", "InnerTableContainer"})
        inner_table = inner_table_container.find("table")
        inner_table_rows = inner_table.find_all("tr")
        inner_table_rows = [e for e in inner_table_rows if e.parent == inner_table]
        table_content = inner_table_container.find("table", attrs={"class", "TableContent"})

        header_row, *rows = table_content.find_all("tr")

        for row in rows:
            columns = row.find_all("td")
            date_column = columns[0]
            date = parse_tibia_datetime(date_column.text.replace("\xa0", " "))
            board_thread_column = columns[1]
            convert_line_breaks(board_thread_column)
            board, thread = board_thread_column.text.splitlines()
            link_column = columns[2]
            post_link = link_column.find("a")
            post_link_url = post_link["href"]
            post_id = int(post_id_regex.search(post_link_url).group(1))
            cm_archive.posts.append(CMPost(date=date, board=board, thread_title=thread, post_id=post_id))
        if not cm_archive.posts:
            return cm_archive
        pages_column, results_column = inner_table_rows[-1].find_all("div")
        page_links = pages_column.find_all("a")
        listed_pages = [int(p.text) for p in page_links]
        if listed_pages:
            cm_archive.page = next((x for x in range(1, listed_pages[-1] + 1) if x not in listed_pages), 0)
            cm_archive.total_pages = max(int(page_links[-1].text), cm_archive.page)
            if not cm_archive.page:
                cm_archive.total_pages += 1
                cm_archive.page = cm_archive.total_pages

        cm_archive.results_count = int(results_column.text.split(":")[-1])
        return cm_archive
Пример #8
0
    def from_content(cls, content):
        """Parse the content of the spells section.

        Parameters
        -----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        ----------
        :class:`SpellsSection`
            The spells contained and the filtering information.

        Raises
        ------
        InvalidContent
            If content is not the HTML of the spells section.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            table_content_container = parsed_content.find(
                "div", attrs={"class": "InnerTableContainer"})
            spells_table = table_content_container.find(
                "table", class_=lambda t: t != "TableContent")
            spell_rows = spells_table.find_all(
                "tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]})
            spells_section = cls()
            for row in spell_rows:
                columns = row.find_all("td")
                if len(columns) != 7:
                    continue
                spell_link = columns[0].find("a")
                url = urllib.parse.urlparse(spell_link["href"])
                query = urllib.parse.parse_qs(url.query)
                cols_text = [c.text for c in columns]
                identifier = query["spell"][0]
                match = spell_name.findall(cols_text[0])
                name, words = match[0]
                group = try_enum(SpellGroup, cols_text[1])
                spell_type = try_enum(SpellType, cols_text[2])
                level = int(cols_text[3])
                mana = parse_integer(cols_text[4], None)
                price = parse_integer(cols_text[5], 0)
                premium = "yes" in cols_text[6]
                spell = SpellEntry(name=name.strip(),
                                   words=words.strip(),
                                   spell_type=spell_type,
                                   level=level,
                                   group=group,
                                   mana=mana,
                                   premium=premium,
                                   price=price,
                                   identifier=identifier)
                spells_section.entries.append(spell)
            form = parsed_content.find("form")
            data = parse_form_data(form)
            spells_section.vocation = try_enum(VocationSpellFilter,
                                               data["vocation"])
            spells_section.group = try_enum(SpellGroup, data["group"])
            spells_section.premium = try_enum(SpellGroup, data["group"])
            spells_section.spell_type = try_enum(SpellType, data["type"])
            spells_section.sort_by = try_enum(SpellSorting, data["sort"])
            spells_section.premium = "yes" in data["premium"] if data[
                "premium"] else None
            return spells_section
        except (AttributeError, TypeError) as e:
            raise errors.InvalidContent(
                "content does not belong to the Spells section", e)