Пример #1
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of the tournament's leaderboards page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`TournamentLeaderboard`
            The tournament contained in the page, or None if the tournament leaderboard doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a tournament's leaderboard page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all('div', attrs={'class': 'TableContainer'})
            if not tables:
                raise InvalidContent("content does not belong to the Tibia.com's tournament leaderboards section")
            selector_table = tables[0]
            leaderboard = cls()
            result = leaderboard._parse_leaderboard_selectors(selector_table)
            if not result:
                return None
            ranking_table = tables[1]
            leaderboard._parse_leaderboard_entries(ranking_table)
            return leaderboard
        except AttributeError as e:
            raise InvalidContent("content does not belong to the Tibia.com's tournament leaderboards section", e)
Пример #2
0
    def from_content(cls, content):
        """Parse the content of the World Overview section from Tibia.com into an object of this class.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the World Overview page in Tibia.com

        Returns
        -------
        :class:`WorldOverview`
            An instance of this class containing all the information.

        Raises
        ------
        InvalidContent
            If the provided content is not the HTML content of the worlds section in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        world_overview = WorldOverview()
        try:
            record_table, *tables \
                = parsed_content.find_all("table", {"class": "TableContent"})
            m = record_regexp.search(record_table.text)
            world_overview.record_count = parse_integer(m.group("count"))
            world_overview.record_date = parse_tibia_datetime(m.group("date"))
            world_overview._parse_worlds_tables(tables)
            return world_overview
        except (AttributeError, KeyError, ValueError) as e:
            raise InvalidContent("content does not belong to the World Overview section in Tibia.com", e)
Пример #3
0
 def parse_parse_pagination_not_collapsed_first_page(self):
     """Parsing first page with page numbers not collapsed"""
     content = """<small><div style="float: left;"><b>» Pages: <span 
     class="PageLink "><span class="CurrentPageLink">1</span></span> <span class="PageLink "><a 
     class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" 
     href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&amp;type=0&amp
     ;currentpage=2">2</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" 
     ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid
     =29122&amp;type=0&amp;currentpage=3">3</a></span> <span class="PageLink "><a class="CipAjaxLink" 
     ajaxcip="true" ajaxcip_datatype="Container" 
     href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&amp;type=0&amp
     ;currentpage=4">4</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" 
     ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid
     =29122&amp;type=0&amp;currentpage=5">5</a></span> <span class="PageLink "><a class="CipAjaxLink" 
     ajaxcip="true" ajaxcip_datatype="Container" 
     href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&amp;type=0&amp
     ;currentpage=6">6</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" 
     ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid
     =29122&amp;type=0&amp;currentpage=7">7</a></span> <span class="PageLink "><a class="CipAjaxLink" 
     ajaxcip="true" ajaxcip_datatype="Container" 
     href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&amp;type=0&amp
     ;currentpage=8">8</a></span></b></div><div style="float: right;"><b>» Results: 567</b></div></small>"""
     parsed_content = utils.parse_tibiacom_content(content,
                                                   builder="html5lib")
     page, total_pages, results_count = utils.parse_pagination(
         parsed_content)
     self.assertEqual(1, page)
     self.assertEqual(8, total_pages)
     self.assertEqual(567, results_count)
Пример #4
0
    def from_content(cls, content, announcement_id=0):
        """Parses the content of an announcement's page from Tibia.com

        Parameters
        ----------
        content: :class:`str`
            The HTML content of an announcement in Tibia.com
        announcement_id: :class:`int`
            The id of the announcement. Since there is no way to obtain the id from the page,
            the id may be passed to assing.

        Returns
        -------
        :class:`ForumAnnouncement`
            The announcement contained in the page or :obj:`None` if not found.

        Raises
        ------
        InvalidContent
            If content is not the HTML content of an announcement page in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table", attrs={"width": "100%"})
        root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])]
        if not root_tables:
            error_table = parsed_content.find("table", attrs={"class": "Table1"})
            if error_table and "not be found" in error_table.text:
                return None
            raise errors.InvalidContent("content is not a Tibia.com forum announcement.")
        forum_info_table, posts_table, footer_table = root_tables

        section_link, board_link, *_ = forum_info_table.find_all("a")
        section = section_link.text
        section_id = int(section_id_regex.search(section_link["href"]).group(1))
        board = board_link.text
        board_id = int(board_id_regex.search(board_link["href"]).group(1))

        announcement = cls(section=section, section_id=section_id, board=board, board_id=board_id,
                           announcement_id=announcement_id)

        timezone = timezone_regex.search(footer_table.text).group(1)
        offset = 1 if timezone == "CES" else 2

        announcement_container = posts_table.find("td", attrs={"class": "CipPost"})
        character_info_container = announcement_container.find("div", attrs={"class": "PostCharacterText"})
        announcement.author = ForumAuthor._parse_author_table(character_info_container)

        post_container = posts_table.find("div", attrs={"class": "PostText"})
        title_tag = post_container.find("b")
        announcement.title = title_tag.text
        dates_container = post_container.find("font")
        dates = post_dates_regex.findall(dates_container.text)
        announcement_content = post_container.encode_contents().decode()
        _, announcement_content = announcement_content.split("<hr/>", 1)
        announcement.content = announcement_content

        announcement.start_date, announcement.end_date = (parse_tibia_forum_datetime(date, offset) for date in dates)

        return announcement
Пример #5
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of a highscores page.

        Notes
        -----
        Tibia.com only shows up to 50 entries per page, so in order to obtain the full highscores, all pages must be
        obtained individually and merged into one.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Highscores`
            The highscores results contained in the page.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a highscore's page."""
        parsed_content = parse_tibiacom_content(content)
        tables = cls._parse_tables(parsed_content)
        filters = tables.get("Highscores Filter")
        if filters is None:
            raise InvalidContent("content does is not from the highscores section of Tibia.com")
        world_filter, vocation_filter, category_filter = filters
        world = world_filter.find("option", {"selected": True})["value"]
        if world == "ALL":
            world = None
        category = int(category_filter.find("option", {"selected": True})["value"])
        vocation_selected = vocation_filter.find("option", {"selected": True})
        vocation = int(vocation_selected["value"]) if vocation_selected else 0
        highscores = cls(world, category, vocation=vocation)
        entries = tables.get("Highscores")
        last_update_container = parsed_content.find("span", attrs={"class": "RightArea"})
        if last_update_container:
            m = numeric_pattern.search(last_update_container.text)
            highscores.last_updated = datetime.timedelta(minutes=int(m.group(1))) if m else datetime.timedelta()
        if entries is None:
            return None
        _, header, *rows = entries
        info_row = rows.pop()
        pages_div, results_div = info_row.find_all("div")
        page_links = pages_div.find_all("a")
        listed_pages = [int(p.text) for p in page_links]
        if listed_pages:
            highscores.page = next((x for x in range(1, listed_pages[-1] + 1) if x not in listed_pages), 0)
            highscores.total_pages = max(int(page_links[-1].text), highscores.page)
        highscores.results_count = int(results_pattern.search(results_div.text).group(1))
        for row in rows:
            cols_raw = row.find_all('td')
            if "There is currently no data" in cols_raw[0].text:
                break
            highscores._parse_entry(cols_raw)
        return highscores
Пример #6
0
    def from_content(cls, content):
        """Parse the content of the leaderboards page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the leaderboards page.

        Returns
        -------
        :class:`Leaderboard`
            The ledaerboard if found.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all("table",
                                             {"class": "TableContent"})
            form = parsed_content.find("form")
            data = parse_form_data(form, include_options=True)
            current_world = data["world"]
            current_rotation = None
            rotations = []
            for label, value in data["__options__"]["rotation"].items():
                current = False
                if "Current" in label:
                    label = "".join(rotation_end_pattern.findall(label))
                    current = True
                rotation_end = parse_tibia_datetime(label)
                rotation = LeaderboardRotation(int(value), rotation_end,
                                               current)
                if value == data["rotation"]:
                    current_rotation = rotation
                rotations.append(rotation)
            leaderboard = cls(current_world, current_rotation)
            leaderboard.available_worlds = [
                w for w in data["__options__"]["world"].values() if w
            ]
            leaderboard.available_rotations = rotations
            if leaderboard.rotation and leaderboard.rotation.current:
                last_update_table = tables[2]
                numbers = re.findall(r'(\d+)', last_update_table.text)
                if numbers:
                    leaderboard.last_update = datetime.timedelta(
                        minutes=int(numbers[0]))
            leaderboard._parse_entries(tables[-1])
            pagination_block = parsed_content.find("small")
            pages, total, count = parse_pagination(
                pagination_block) if pagination_block else (0, 0, 0)
            leaderboard.page = pages
            leaderboard.total_pages = total
            leaderboard.results_count = count
            return leaderboard
        except (AttributeError, ValueError) as e:
            raise errors.InvalidContent(
                "content does not belong to the leaderboards", e)
Пример #7
0
    def from_content(cls, content):
        """Creates an instance of the class from the HTML content of the kill statistics' page.

        Parameters
        -----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        ----------
        :class:`KillStatistics`
            The kill statistics contained in the page or None if it doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a kill statistics' page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            selection_table = parsed_content.find(
                'div', attrs={'class': 'TableContainer'})
            world = selection_table.find("option", {"selected": True})["value"]

            entries_table = parsed_content.find('table',
                                                attrs={
                                                    'border': '0',
                                                    'cellpadding': '3'
                                                })
            # If the entries table doesn't exist, it means that this belongs to an nonexistent or unselected world.
            if entries_table is None:
                return None
            header, subheader, *rows = entries_table.find_all('tr')
            entries = {}
            total = None
            for i, row in enumerate(rows):
                columns_raw = row.find_all('td')
                columns = [
                    c.text.replace('\xa0', ' ').strip() for c in columns_raw
                ]
                entry = RaceEntry(
                    last_day_players_killed=int(columns[1]),
                    last_day_killed=int(columns[2]),
                    last_week_players_killed=int(columns[3]),
                    last_week_killed=int(columns[4]),
                )
                if i == len(rows) - 1:
                    total = entry
                else:
                    entries[columns[0]] = entry
            return cls(world, entries, total)
        except AttributeError:
            raise InvalidContent(
                "content does not belong to a Tibia.com kill statistics page.")
Пример #8
0
    def from_content(cls, content):
        """Parse a Tibia.com response into a House object.

        Parameters
        ----------
        content: :class:`str`
            HTML content of the page.

        Returns
        -------
        :class:`House`
            The house contained in the page, or None if the house doesn't exist.

        Raises
        ------
        InvalidContent
            If the content is not the house section on Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        image_column, desc_column, *_ = parsed_content.find_all('td')
        if "Error" in image_column.text:
            return None
        image = image_column.find('img')
        for br in desc_column.find_all("br"):
            br.replace_with("\n")
        description = desc_column.text.replace("\u00a0",
                                               " ").replace("\n\n", "\n")
        lines = description.splitlines()
        try:
            name, beds, info, state, *_ = lines
        except ValueError:
            raise InvalidContent(
                "content does is not from the house section of Tibia.com")

        house = cls(name.strip())
        house.image_url = image["src"]
        house.id = int(id_regex.search(house.image_url).group(1))
        m = bed_regex.search(beds)
        if m:
            if m.group("type").lower() in ["guildhall", "clanhall"]:
                house.type = HouseType.GUILDHALL
            else:
                house.type = HouseType.HOUSE
            house.beds = int(m.group("beds"))

        m = info_regex.search(info)
        if m:
            house.world = m.group("world")
            house.rent = parse_tibia_money(m.group("rent"))
            house.size = int(m.group("size"))

        house._parse_status(state)
        return house
Пример #9
0
    def from_content(cls, content):
        """Parse the content of a spells page.

        Parameters
        -----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        ----------
        :class:`Spell`
            The spell data. If the spell doesn't exist, this will be :obj:`None`.

        Raises
        ------
        InvalidContent
            If content is not the HTML of the spells section.
        """
        parsed_content = parse_tibiacom_content(content)
        try:
            tables = parse_tibiacom_tables(parsed_content)
            title_table = parsed_content.find("table", attrs={"class": False})
            spell_table = tables["Spell Information"]
            img = title_table.find("img")
            url = urllib.parse.urlparse(img["src"])
            filename = os.path.basename(url.path)
            identifier = str(filename.split(".")[0])
            next_sibling = title_table.next_sibling
            description = ""
            while next_sibling:
                if isinstance(next_sibling, bs4.Tag):
                    if next_sibling.name == "br":
                        description += "\n"
                    elif next_sibling.name == "table":
                        break
                    else:
                        description += next_sibling.text
                elif isinstance(next_sibling, bs4.NavigableString):
                    description += str(next_sibling)
                next_sibling = next_sibling.next_sibling
            spell = cls._parse_spells_table(identifier, spell_table)
            spell.description = description.strip()
            if "Rune Information" in tables:
                spell.rune = cls._parse_rune_table(tables["Rune Information"])
            return spell
        except (TypeError, AttributeError, IndexError, KeyError) as e:
            form = parsed_content.find("form")
            if form:
                data = parse_form_data(form)
                if "subtopic=spells" in data.get("__action__"):
                    return None
            raise errors.InvalidContent("content is not a spell page", e)
Пример #10
0
    def from_content(cls, content):
        """Gets a guild's war information from Tibia.com's content

        Parameters
        ----------
        content: :class:`str`
            The HTML content of a guild's war section in Tibia.com

        Returns
        -------
        :class:`GuildWars`
            The guild's war information.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            table_current, table_history = parsed_content.find_all(
                "div", attrs={"class": "TableContainer"})
            current_table_content = table_current.find(
                "table", attrs={"class": "TableContent"})
            current_war = None
            guild_name = None
            if current_table_content is not None:
                for br in current_table_content.find_all("br"):
                    br.replace_with("\n")
                current_war = cls._parse_current_war_information(
                    current_table_content.text)
            else:
                current_war_text = table_current.text
                current_war_match = war_current_empty.search(current_war_text)
                guild_name = current_war_match.group(1)

            history_entries = []
            history_contents = table_history.find_all(
                "table", attrs={"class": "TableContent"})
            for history_content in history_contents:
                for br in history_content.find_all("br"):
                    br.replace_with("\n")
                entry = cls._parse_war_history_entry(history_content.text)
                history_entries.append(entry)

            if current_war:
                guild_name = current_war.guild_name
            elif history_entries:
                guild_name = history_entries[0].guild_name

            return cls(guild_name,
                       current=current_war,
                       history=history_entries)
        except ValueError as e:
            raise InvalidContent(
                "content does not belong to the guild wars section", e)
Пример #11
0
    def list_from_content(cls, content):
        """
        Gets a list of guilds from the HTML content of the world guilds' page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`list` of :class:`ListedGuild`
            List of guilds in the current world. ``None`` if it's the list of a world that doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a guild's page.
        """
        parsed_content = parse_tibiacom_content(content)
        selected_world = parsed_content.find('option', selected=True)
        try:
            if "choose world" in selected_world.text:
                # It belongs to a world that doesn't exist
                return None
            world = selected_world.text
        except AttributeError:
            raise InvalidContent(
                "Content does not belong to world guild list.")
        # First TableContainer contains world selector.
        _, *containers = parsed_content.find_all('div',
                                                 class_="TableContainer")
        guilds = []
        for container in containers:
            header = container.find('div', class_="Text")
            active = "Active" in header.text
            header, *rows = container.find_all(
                "tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]})
            for row in rows:
                columns = row.find_all('td')
                logo_img = columns[0].find('img')["src"]
                description_lines = columns[1].get_text("\n").split("\n", 1)
                name = description_lines[0]
                description = None
                if len(description_lines) > 1:
                    description = description_lines[1].replace("\r",
                                                               "").replace(
                                                                   "\n", " ")
                guild = cls(name, world, logo_img, description, active)
                guilds.append(guild)
        return guilds
Пример #12
0
    def from_content(cls, content):
        """Parse the content of a house list from Tibia.com into a list of houses.

        Parameters
        ----------
        content: :class:`str`
            The raw HTML response from the house list.

        Returns
        -------
        :class:`HouseSection`
            The houses found in the page.

        Raises
        ------
        InvalidContent`
            Content is not the house list from Tibia.com
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parse_tibiacom_tables(parsed_content)
            house_results = cls()
            house_results._parse_filters(tables["House Search"])
            if len(tables) < 2:
                return house_results
            houses_table = tables[list(tables.keys())[0]]
            _, *rows = houses_table.find_all("tr")
            for row in rows[1:]:
                cols = row.find_all("td")
                if len(cols) != 5:
                    continue
                name = cols[0].text.replace('\u00a0', ' ')
                house = HouseEntry(name,
                                   house_results.world,
                                   0,
                                   town=house_results.town,
                                   type=house_results.house_type)
                size = cols[1].text.replace('sqm', '')
                house.size = int(size)
                rent = cols[2].text.replace('gold', '')
                house.rent = parse_tibia_money(rent)
                status = cols[3].text.replace('\xa0', ' ')
                house._parse_status(status)
                id_input = cols[4].find("input", {'name': 'houseid'})
                house.id = int(id_input["value"])
                house_results.entries.append(house)
            return house_results
        except (ValueError, AttributeError, KeyError) as e:
            raise InvalidContent(
                "content does not belong to a Tibia.com house list", e)
Пример #13
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of the tournament's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Tournament`
            The tournament contained in the page, or None if the tournament doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a tournament's page.
        """
        try:
            if "An internal error has occurred" in content:
                return None
            if "Currently there is no Tournament running." in content:
                return None
            parsed_content = parse_tibiacom_content(content,
                                                    builder='html5lib')
            box_content = parsed_content.find("div",
                                              attrs={"class": "BoxContent"})
            tables = box_content.find_all('table', attrs={"class": "Table5"})
            archive_table = box_content.find('table',
                                             attrs={"class": "Table4"})
            tournament_details_table = tables[-1]
            info_tables = tournament_details_table.find_all(
                'table', attrs={'class': 'TableContent'})
            main_info = info_tables[0]
            rule_set = info_tables[1]
            score_set = info_tables[2]
            reward_set = info_tables[3]
            tournament = cls()
            tournament._parse_tournament_info(main_info)
            tournament._parse_tournament_rules(rule_set)
            tournament._parse_tournament_scores(score_set)
            tournament._parse_tournament_rewards(reward_set)
            if archive_table:
                tournament._parse_archive_list(archive_table)
            return tournament
        except IndexError as e:
            raise InvalidContent(
                "content does not belong to the Tibia.com's tournament section",
                e)
Пример #14
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of a highscores page.

        Notes
        -----
        Tibia.com only shows up to 25 entries per page, so in order to obtain the full highscores, all 12 pages must
        be parsed and merged into one.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Highscores`
            The highscores results contained in the page.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a highscore's page."""
        parsed_content = parse_tibiacom_content(content)
        tables = cls._parse_tables(parsed_content)
        filters = tables.get("Highscores Filter")
        if filters is None:
            raise InvalidContent("content does is not from the highscores section of Tibia.com")
        world_filter, vocation_filter, category_filter = filters
        world = world_filter.find("option", {"selected": True})["value"]
        if world == "":
            return None
        category = category_filter.find("option", {"selected": True})["value"]
        vocation_selected = vocation_filter.find("option", {"selected": True})
        vocation = int(vocation_selected["value"]) if vocation_selected else 0
        highscores = cls(world, category, vocation=vocation)
        entries = tables.get("Highscores")
        if entries is None:
            return None
        _, header, *rows = entries
        info_row = rows.pop()
        highscores.results_count = int(results_pattern.search(info_row.text).group(1))
        for row in rows:
            cols_raw = row.find_all('td')
            if "There is currently no data" in cols_raw[0].text:
                break
            highscores._parse_entry(cols_raw)
        return highscores
Пример #15
0
    def list_from_content(cls, content):
        """Parses the content of a house list from Tibia.com into a list of houses

        Parameters
        ----------
        content: :class:`str`
            The raw HTML response from the house list.

        Returns
        -------
        :class:`list` of :class:`ListedHouse`

        Raises
        ------
        InvalidContent`
            Content is not the house list from Tibia.com
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            table = parsed_content.find("table")
            header, *rows = table.find_all("tr")
        except (ValueError, AttributeError):
            raise InvalidContent("content does not belong to a Tibia.com house list")

        m = list_header_regex.match(header.text.strip())
        if not m:
            return None
        town = m.group("town")
        world = m.group("world")
        house_type = HouseType.GUILDHALL if m.group("type") == "Guildhalls" else HouseType.HOUSE
        houses = []
        for row in rows[1:]:
            cols = row.find_all("td")
            if len(cols) != 6:
                continue
            name = cols[0].text.replace('\u00a0', ' ')
            house = ListedHouse(name, world, 0, town=town, type=house_type)
            size = cols[1].text.replace('sqm', '')
            house.size = int(size)
            rent = cols[2].text.replace('gold', '')
            house.rent = parse_tibia_money(rent)
            status = cols[3].text.replace('\xa0', ' ')
            house._parse_status(status)
            id_input = cols[5].find("input", {'name': 'houseid'})
            house.id = int(id_input["value"])
            houses.append(house)
        return houses
Пример #16
0
    def from_content(cls, content):
        """Creates an instance of the class from the HTML content of the guild's page.

        Parameters
        -----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        ----------
        :class:`Guild`
            The guild contained in the page or None if it doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a guild's page.
        """
        if "An internal error has occurred" in content:
            return None

        parsed_content = parse_tibiacom_content(content)
        try:
            name_header = parsed_content.find('h1')
            guild = Guild(name_header.text.strip())
        except AttributeError:
            raise InvalidContent(
                "content does not belong to a Tibia.com guild page.")

        if not guild._parse_logo(parsed_content):
            raise InvalidContent(
                "content does not belong to a Tibia.com guild page.")

        info_container = parsed_content.find("div",
                                             id="GuildInformationContainer")
        guild._parse_guild_info(info_container)
        guild._parse_application_info(info_container)
        guild._parse_guild_homepage(info_container)
        guild._parse_guild_guildhall(info_container)
        guild._parse_guild_disband_info(info_container)
        guild._parse_guild_members(parsed_content)

        if guild.guildhall and guild.members:
            guild.guildhall.owner = guild.members[0].name

        return guild
Пример #17
0
 def test_parse_pagination_collapse_last_page(self):
     """Parsing the last page out of 928"""
     content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink 
     FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp
     ;currentpage=1">First Page</a></span> ... <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=925">925</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp
     ;currentpage=926">926</a></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=927">927</a></span> 
     <span class="PageLink FirstOrLastElement"><span class="CurrentPageLink">Last Page</span></span></b></div><div 
     style="float: right;"><b>» Results: 23197</b></div></small></td> """
     parsed_content = utils.parse_tibiacom_content(content,
                                                   builder="html5lib")
     page, total_pages, results_count = utils.parse_pagination(
         parsed_content)
     self.assertEqual(928, page)
     self.assertEqual(928, total_pages)
     self.assertEqual(23197, results_count)
Пример #18
0
    def list_from_content(cls, content):
        """
        Gets a list of news from the HTML content of the news search page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`list` of :class:`ListedNews`
            List of news in the search results.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a news search's page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all("table", attrs={"width": "100%"})
            news = []
            news_table = tables[0]
            title_row = news_table.find("td",
                                        attrs={
                                            "class": "white",
                                            "colspan": "3"
                                        })
            if title_row.text != "Search Results":
                raise InvalidContent(
                    "content is not from the news archive section in Tibia.com"
                )
            rows = news_table.find_all("tr", attrs={"class": ["Odd", "Even"]})
            for row in rows:
                cols_raw = row.find_all('td')
                if len(cols_raw) != 3:
                    continue
                entry = cls._parse_entry(cols_raw)
                news.append(entry)
            return news
        except (AttributeError, IndexError):
            raise InvalidContent(
                "content is not from the news archive section in Tibia.com")
Пример #19
0
    def from_content(cls, content):
        """Get a list of guilds from the HTML content of the world guilds' page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`GuildsSection`
            List of guilds in the current world. :obj:`None` if it's the list of a world that doesn't exist.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a guild's page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            form = parsed_content.find("form")
            data = parse_form_data(form, include_options=True)
            selected_world = data["world"] if data["world"] else None
            available_worlds = [w for w in data["__options__"]["world"].values() if w]
            guilds = cls(selected_world, available_worlds=available_worlds)
        except AttributeError as e:
            raise InvalidContent("Content does not belong to world guild list.", e)
        # First TableContainer contains world selector.
        _, *containers = parsed_content.find_all('div', class_="TableContainer")
        for container in containers:
            header = container.find('div', class_="Text")
            active = "Active" in header.text
            header, *rows = container.find_all("tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]})
            for row in rows:
                columns = row.find_all('td')
                logo_img = columns[0].find('img')["src"]
                description_lines = columns[1].get_text("\n").split("\n", 1)
                name = description_lines[0]
                description = None
                if len(description_lines) > 1:
                    description = description_lines[1].replace("\r", "").replace("\n", " ")
                guild = GuildEntry(name, guilds.world, logo_img, description, active)
                guilds.entries.append(guild)
        return guilds
Пример #20
0
    def from_content(cls, content):
        """Parses a Tibia.com response into a :class:`World`.

        Parameters
        ----------
        content: :class:`str`
            The raw HTML from the server's information page.

        Returns
        -------
        :class:`World`
            The World described in the page, or ``None``.

        Raises
        ------
        InvalidContent
            If the provided content is not the html content of the world section in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        tables = cls._parse_tables(parsed_content)
        try:
            error = tables.get("Error")
            if error and error[0].text == "World with this name doesn't exist!":
                return None
            selected_world = parsed_content.find('option', selected=True)
            world = cls(selected_world.text)
            world._parse_world_info(tables.get("World Information", []))

            online_table = tables.get("Players Online", [])
            world.online_players = []
            for row in online_table[1:]:
                cols_raw = row.find_all('td')
                name, level, vocation = (c.text.replace('\xa0', ' ').strip()
                                         for c in cols_raw)
                world.online_players.append(
                    OnlineCharacter(name, world.name, int(level), vocation))
        except AttributeError:
            raise InvalidContent(
                "content is not from the world section in Tibia.com")

        return world
Пример #21
0
    def from_content(cls, content):
        """Create an instance of the class from the html content of the creature library's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Character`
            The character contained in the page.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a creature library's page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            boosted_creature_table = parsed_content.find("div", {"class": "TableContainer"})
            boosted_creature_text = boosted_creature_table.find("div", {"class": "Text"})
            if not boosted_creature_text or "Boosted" not in boosted_creature_text.text:
                return None
            boosted_creature_link = boosted_creature_table.find("a")
            url = urllib.parse.urlparse(boosted_creature_link["href"])
            query = urllib.parse.parse_qs(url.query)
            boosted_creature = CreatureEntry(boosted_creature_link.text, query["race"][0])

            list_table = parsed_content.find("div", style=lambda v: v and 'display: table' in v)
            entries_container = list_table.find_all("div", style=lambda v: v and 'float: left' in v)
            entries = []
            for entry_container in entries_container:
                name = entry_container.text.strip()
                link = entry_container.find("a")
                url = urllib.parse.urlparse(link["href"])
                query = urllib.parse.parse_qs(url.query)
                entries.append(CreatureEntry(name, query["race"][0]))
            return cls(boosted_creature, entries)
        except (AttributeError, ValueError) as e:
            raise InvalidContent("content is not the creature's library", e)
Пример #22
0
    def from_content(cls, content):
        """Get a list of news from the HTML content of the news search page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`NewsArchive`
            The news archive with the news found.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a news search's page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parse_tibiacom_tables(parsed_content)
            if "News Archive Search" not in tables:
                raise InvalidContent(
                    "content is not from the news archive section in Tibia.com"
                )
            form = parsed_content.find("form")
            news_archive = cls._parse_filtering(form)
            if "Search Results" in tables:
                rows = tables["Search Results"].find_all(
                    "tr", attrs={"class": ["Odd", "Even"]})
                for row in rows:
                    cols_raw = row.find_all('td')
                    if len(cols_raw) != 3:
                        continue
                    entry = cls._parse_entry(cols_raw)
                    news_archive.entries.append(entry)
            return news_archive
        except (AttributeError, IndexError, ValueError, KeyError) as e:
            raise InvalidContent(
                "content is not from the news archive section in Tibia.com", e)
Пример #23
0
    def from_content(cls, content):
        """Create an instance of the class from the html content of a highscores page.

        Notes
        -----
        Tibia.com only shows up to 50 entries per page, so in order to obtain the full highscores, all pages must be
        obtained individually and merged into one.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Highscores`
            The highscores results contained in the page.

        Raises
        ------
        InvalidContent
            If content is not the HTML of a highscore's page.
        """
        parsed_content = parse_tibiacom_content(content)
        form = parsed_content.find("form")
        tables = cls._parse_tables(parsed_content)
        if form is None:
            if "Error" in tables and "The world doesn't exist!" in tables["Error"].text:
                return None
            raise InvalidContent("content does is not from the highscores section of Tibia.com")
        highscores = cls(None)
        highscores._parse_filters_table(form)
        last_update_container = parsed_content.find("span", attrs={"class": "RightArea"})
        if last_update_container:
            m = numeric_pattern.search(last_update_container.text)
            highscores.last_updated = datetime.timedelta(minutes=int(m.group(1))) if m else datetime.timedelta()
        entries_table = tables.get("Highscores")
        highscores._parse_entries_table(entries_table)
        return highscores
Пример #24
0
 def test_parse_pagination_collapsed_first_page(self):
     """Parsing with current page 1 out of 915"""
     content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink 
     FirstOrLastElement"><span class="CurrentPageLink">First Page</span></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=2">2</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp
     ;currentpage=3">3</a></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=4">4</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp
     ;currentpage=5">5</a></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=6">6</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp
     ;currentpage=7">7</a></span> ... <span class="PageLink FirstOrLastElement"><a 
     href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&amp;currentpage=915">Last 
     Page</a></span></b></div><div style="float: right;"><b>» Results: 22874</b></div></small></td>"""
     parsed_content = utils.parse_tibiacom_content(content,
                                                   builder="html5lib")
     page, total_pages, results_count = utils.parse_pagination(
         parsed_content)
     self.assertEqual(1, page)
     self.assertEqual(915, total_pages)
     self.assertEqual(22874, results_count)
Пример #25
0
    def from_content(cls, content):
        """Create an instance of the class from the html content of the creature library's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Creature`
            The character contained in the page.
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            pagination_container, content_container = \
                parsed_content.find_all("div", style=lambda v: v and 'position: relative' in v)
            title_container, description_container = content_container.find_all("div")
            title = title_container.find("h2")
            name = title.text.strip()

            img = title_container.find("img")
            img_url = img["src"]
            race = img_url.split("/")[-1].replace(".gif", "")
            creature = cls(name, race)

            paragraph_tags = description_container.find_all("p")
            paragraphs = [p.text for p in paragraph_tags]
            creature.description = "\n".join(paragraphs[:-2])
            hp_text = paragraphs[-2]
            creature._parse_hp_text(hp_text)

            exp_text = paragraphs[-1]
            creature._parse_exp_text(exp_text)
            return creature
        except ValueError:
            return None
Пример #26
0
    def list_from_content(cls, content):
        """Parses the content of a board list Tibia.com into a list of boards.

        Parameters
        ----------
        content: :class:`str`
            The raw HTML response from the board list.

        Returns
        -------
        :class:`list` of :class:`ListedBoard`

        Raises
        ------
        InvalidContent`
            Content is not a board list in Tibia.com
        """
        try:
            parsed_content = parse_tibiacom_content(content)
            tables = parsed_content.find_all("table", attrs={"width": "100%"})
            _, board_list_table, timezone_table = tables
            _, *board_rows = board_list_table.find_all("tr")
            timezone_text = timezone_table.text
            timezone = timezone_regex.search(timezone_text).group(1)
            offset = 1 if timezone == "CES" else 2
            boards = []
            for board_row in board_rows[:-3]:
                try:
                    board = cls._parse_board_row(board_row, offset)
                except IndexError:
                    continue
                else:
                    boards.append(board)
            return boards
        except ValueError as e:
            raise errors.InvalidContent("content does not belong to a forum section.", e)
Пример #27
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of the character's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`Character`
            The character contained in the page, or None if the character doesn't exist

        Raises
        ------
        InvalidContent
            If content is not the HTML of a character's page.
        """
        parsed_content = parse_tibiacom_content(content)
        tables = cls._parse_tables(parsed_content)
        char = Character()
        if "Could not find character" in tables.keys():
            return None
        if "Character Information" in tables.keys():
            char._parse_character_information(tables["Character Information"])
        else:
            raise InvalidContent(
                "content does not contain a tibia.com character information page."
            )
        char._parse_achievements(tables.get("Account Achievements", []))
        if "Account Badges" in tables:
            char._parse_badges(tables["Account Badges"])
        char._parse_deaths(tables.get("Character Deaths", []))
        char._parse_account_information(tables.get("Account Information", []))
        char._parse_other_characters(tables.get("Characters", []))
        return char
Пример #28
0
 def test_parse_pagination_collapsed_middle(self):
     """Parsing page 300 out of 503"""
     content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink 
     FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&amp
     ;currentpage=1">First Page</a></span> ... <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&currentpage=297">297</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades
     &currentpage=298">298</a></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&currentpage=299">299</a></span> 
     <span class="PageLink "><span class="CurrentPageLink">300</span></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&currentpage=301">301</a></span> 
     <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades
     &currentpage=302">302</a></span> <span class="PageLink "><a 
     href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&currentpage=303">303</a></span> 
     ... <span class="PageLink FirstOrLastElement"><a 
     href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades&currentpage=503">Last 
     Page</a></span></b></div><div style="float: right;"><b>» Results: 12568</b></div></small></td> """
     parsed_content = utils.parse_tibiacom_content(content,
                                                   builder="html5lib")
     page, total_pages, results_count = utils.parse_pagination(
         parsed_content)
     self.assertEqual(300, page)
     self.assertEqual(503, total_pages)
     self.assertEqual(12568, results_count)
Пример #29
0
    def from_content(cls, content):
        """Creates an instance of the class from the html content of the thread's page.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the page.

        Returns
        -------
        :class:`ForumThread`
            The thread contained in the page, or None if the thread doesn't exist

        Raises
        ------
        InvalidContent
            If content is not the HTML of a thread's page.
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table")
        root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])]
        if not root_tables:
            error_table = parsed_content.find("table", attrs={"class": "Table1"})
            if error_table and "not found" in error_table.text:
                return None
            raise errors.InvalidContent("content is not a Tibia.com forum thread.")
        try:
            if len(root_tables) == 4:
                forum_info_table, title_table, posts_table, footer_table = root_tables
            else:
                forum_info_table, title_table, footer_table = root_tables
                posts_table = None
        except ValueError as e:
            raise errors.InvalidContent("content is not a Tibia.com forum thread.", e)

        header_text = forum_info_table.text
        section, board, *_ = split_list(header_text, "|", "|")

        thread = cls(section=section, board=board)

        thread.title = title_table.text.strip()
        golden_frame = title_table.find("div", attrs={"class": "CipPost"})
        thread.golden_frame = golden_frame is not None

        timezone = timezone_regex.search(footer_table.text).group(1)
        time_page_column, navigation_column = footer_table.find_all("td", attrs={"class", "ff_white"})
        page_links = time_page_column.find_all("a")
        if page_links:
            last_link = page_links[-1]["href"]
            thread.page = int(footer_table.find("span").text)
            thread.total_pages = max(int(page_number_regex.search(last_link).group(1)), thread.page)

        navigation_links = navigation_column.find_all("a")
        if len(navigation_links) == 2:
            prev_link, next_link = navigation_links
            prev_link_url = prev_link["href"]
            thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1))
            next_link_url = next_link["href"]
            thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1))
        elif "Previous" in navigation_links[0].text:
            prev_link_url = navigation_links[0]["href"]
            thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1))
        else:
            next_link_url = navigation_links[0]["href"]
            thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1))
        offset = 1 if timezone == "CES" else 2

        if posts_table:
            thread_info_table, *post_tables = posts_table.find_all("div", attrs={"class": "ForumPost"})
            inner_info_table = thread_info_table.find("table")
            thread_num_col, thread_pages_col, thread_navigation_col = inner_info_table.find_all("td")
            thread.thread_id = int(thread_num_col.text.replace("Thread #", ""))
            for post_table in post_tables:
                post = cls._parse_post_table(post_table, offset)
                thread.posts.append(post)
        return thread
Пример #30
0
    def from_content(cls, content):
        """Parses the board's HTML content from Tibia.com.

        Parameters
        ----------
        content: :class:`str`
            The HTML content of the board.

        Returns
        -------
        :class:`ForumBoard`
            The forum board contained.

        Raises
        ------
        InvalidContent`
            Content is not a board in Tibia.com
        """
        parsed_content = parse_tibiacom_content(content)
        tables = parsed_content.find_all("table")
        try:
            header_table, time_selector_table, threads_table, timezone_table, boardjump_table, *_ = tables
        except ValueError as e:
            raise errors.InvalidContent("content is not a forum board", e)
        header_text = header_table.text.strip()
        section, name = split_list(header_text, "|", "|")

        board = cls(name=name, section=section)
        thread_rows = threads_table.find_all("tr")

        age_selector = time_selector_table.find("select")
        if not age_selector:
            return cls(section=section, name=name)
        selected_age = age_selector.find("option", {"selected": True})
        if selected_age:
            board.age = int(selected_age["value"])

        board_selector = boardjump_table.find("select")
        selected_board = board_selector.find("option", {"selected": True})
        board.board_id = int(selected_board["value"])

        page_info = threads_table.find("td", attrs={"class": "ff_info"})
        if page_info:
            current_page_text = page_info.find("span")
            page_links = page_info.find_all("a")
            if current_page_text:
                board.page = int(current_page_text.text)
                board.total_pages = max(board.page, int(page_number_regex.search(page_links[-1]["href"]).group(1)))

        for thread_row in thread_rows[1:]:
            columns = thread_row.find_all("td")
            if len(columns) != 7:
                continue

            entry = cls._parse_thread_row(columns)
            if isinstance(entry, ListedThread):
                board.threads.append(entry)
                cip_border = thread_row.find("div", attrs={"class": "CipBorder"})
                if cip_border:
                    entry.golden_frame = True
            elif isinstance(entry, ListedAnnouncement):
                board.announcements.append(entry)

        return board