def from_content(cls, content): """Creates an instance of the class from the html content of the tournament's leaderboards page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`TournamentLeaderboard` The tournament contained in the page, or None if the tournament leaderboard doesn't exist. Raises ------ InvalidContent If content is not the HTML of a tournament's leaderboard page. """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all('div', attrs={'class': 'TableContainer'}) if not tables: raise InvalidContent("content does not belong to the Tibia.com's tournament leaderboards section") selector_table = tables[0] leaderboard = cls() result = leaderboard._parse_leaderboard_selectors(selector_table) if not result: return None ranking_table = tables[1] leaderboard._parse_leaderboard_entries(ranking_table) return leaderboard except AttributeError as e: raise InvalidContent("content does not belong to the Tibia.com's tournament leaderboards section", e)
def from_content(cls, content): """Parse the content of the World Overview section from Tibia.com into an object of this class. Parameters ---------- content: :class:`str` The HTML content of the World Overview page in Tibia.com Returns ------- :class:`WorldOverview` An instance of this class containing all the information. Raises ------ InvalidContent If the provided content is not the HTML content of the worlds section in Tibia.com """ parsed_content = parse_tibiacom_content(content) world_overview = WorldOverview() try: record_table, *tables \ = parsed_content.find_all("table", {"class": "TableContent"}) m = record_regexp.search(record_table.text) world_overview.record_count = parse_integer(m.group("count")) world_overview.record_date = parse_tibia_datetime(m.group("date")) world_overview._parse_worlds_tables(tables) return world_overview except (AttributeError, KeyError, ValueError) as e: raise InvalidContent("content does not belong to the World Overview section in Tibia.com", e)
def parse_parse_pagination_not_collapsed_first_page(self): """Parsing first page with page numbers not collapsed""" content = """<small><div style="float: left;"><b>» Pages: <span class="PageLink "><span class="CurrentPageLink">1</span></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&type=0& ;currentpage=2">2</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid =29122&type=0&currentpage=3">3</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&type=0& ;currentpage=4">4</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid =29122&type=0&currentpage=5">5</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&type=0& ;currentpage=6">6</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid =29122&type=0&currentpage=7">7</a></span> <span class="PageLink "><a class="CipAjaxLink" ajaxcip="true" ajaxcip_datatype="Container" href="https://www.tibia.com/charactertrade/ajax_getcharacterdata.php?auctionid=29122&type=0& ;currentpage=8">8</a></span></b></div><div style="float: right;"><b>» Results: 567</b></div></small>""" parsed_content = utils.parse_tibiacom_content(content, builder="html5lib") page, total_pages, results_count = utils.parse_pagination( parsed_content) self.assertEqual(1, page) self.assertEqual(8, total_pages) self.assertEqual(567, results_count)
def from_content(cls, content, announcement_id=0): """Parses the content of an announcement's page from Tibia.com Parameters ---------- content: :class:`str` The HTML content of an announcement in Tibia.com announcement_id: :class:`int` The id of the announcement. Since there is no way to obtain the id from the page, the id may be passed to assing. Returns ------- :class:`ForumAnnouncement` The announcement contained in the page or :obj:`None` if not found. Raises ------ InvalidContent If content is not the HTML content of an announcement page in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not be found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum announcement.") forum_info_table, posts_table, footer_table = root_tables section_link, board_link, *_ = forum_info_table.find_all("a") section = section_link.text section_id = int(section_id_regex.search(section_link["href"]).group(1)) board = board_link.text board_id = int(board_id_regex.search(board_link["href"]).group(1)) announcement = cls(section=section, section_id=section_id, board=board, board_id=board_id, announcement_id=announcement_id) timezone = timezone_regex.search(footer_table.text).group(1) offset = 1 if timezone == "CES" else 2 announcement_container = posts_table.find("td", attrs={"class": "CipPost"}) character_info_container = announcement_container.find("div", attrs={"class": "PostCharacterText"}) announcement.author = ForumAuthor._parse_author_table(character_info_container) post_container = posts_table.find("div", attrs={"class": "PostText"}) title_tag = post_container.find("b") announcement.title = title_tag.text dates_container = post_container.find("font") dates = post_dates_regex.findall(dates_container.text) announcement_content = post_container.encode_contents().decode() _, announcement_content = announcement_content.split("<hr/>", 1) announcement.content = announcement_content announcement.start_date, announcement.end_date = (parse_tibia_forum_datetime(date, offset) for date in dates) return announcement
def from_content(cls, content): """Creates an instance of the class from the html content of a highscores page. Notes ----- Tibia.com only shows up to 50 entries per page, so in order to obtain the full highscores, all pages must be obtained individually and merged into one. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Highscores` The highscores results contained in the page. Raises ------ InvalidContent If content is not the HTML of a highscore's page.""" parsed_content = parse_tibiacom_content(content) tables = cls._parse_tables(parsed_content) filters = tables.get("Highscores Filter") if filters is None: raise InvalidContent("content does is not from the highscores section of Tibia.com") world_filter, vocation_filter, category_filter = filters world = world_filter.find("option", {"selected": True})["value"] if world == "ALL": world = None category = int(category_filter.find("option", {"selected": True})["value"]) vocation_selected = vocation_filter.find("option", {"selected": True}) vocation = int(vocation_selected["value"]) if vocation_selected else 0 highscores = cls(world, category, vocation=vocation) entries = tables.get("Highscores") last_update_container = parsed_content.find("span", attrs={"class": "RightArea"}) if last_update_container: m = numeric_pattern.search(last_update_container.text) highscores.last_updated = datetime.timedelta(minutes=int(m.group(1))) if m else datetime.timedelta() if entries is None: return None _, header, *rows = entries info_row = rows.pop() pages_div, results_div = info_row.find_all("div") page_links = pages_div.find_all("a") listed_pages = [int(p.text) for p in page_links] if listed_pages: highscores.page = next((x for x in range(1, listed_pages[-1] + 1) if x not in listed_pages), 0) highscores.total_pages = max(int(page_links[-1].text), highscores.page) highscores.results_count = int(results_pattern.search(results_div.text).group(1)) for row in rows: cols_raw = row.find_all('td') if "There is currently no data" in cols_raw[0].text: break highscores._parse_entry(cols_raw) return highscores
def from_content(cls, content): """Parse the content of the leaderboards page. Parameters ---------- content: :class:`str` The HTML content of the leaderboards page. Returns ------- :class:`Leaderboard` The ledaerboard if found. """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", {"class": "TableContent"}) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) current_world = data["world"] current_rotation = None rotations = [] for label, value in data["__options__"]["rotation"].items(): current = False if "Current" in label: label = "".join(rotation_end_pattern.findall(label)) current = True rotation_end = parse_tibia_datetime(label) rotation = LeaderboardRotation(int(value), rotation_end, current) if value == data["rotation"]: current_rotation = rotation rotations.append(rotation) leaderboard = cls(current_world, current_rotation) leaderboard.available_worlds = [ w for w in data["__options__"]["world"].values() if w ] leaderboard.available_rotations = rotations if leaderboard.rotation and leaderboard.rotation.current: last_update_table = tables[2] numbers = re.findall(r'(\d+)', last_update_table.text) if numbers: leaderboard.last_update = datetime.timedelta( minutes=int(numbers[0])) leaderboard._parse_entries(tables[-1]) pagination_block = parsed_content.find("small") pages, total, count = parse_pagination( pagination_block) if pagination_block else (0, 0, 0) leaderboard.page = pages leaderboard.total_pages = total leaderboard.results_count = count return leaderboard except (AttributeError, ValueError) as e: raise errors.InvalidContent( "content does not belong to the leaderboards", e)
def from_content(cls, content): """Creates an instance of the class from the HTML content of the kill statistics' page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`KillStatistics` The kill statistics contained in the page or None if it doesn't exist. Raises ------ InvalidContent If content is not the HTML of a kill statistics' page. """ try: parsed_content = parse_tibiacom_content(content) selection_table = parsed_content.find( 'div', attrs={'class': 'TableContainer'}) world = selection_table.find("option", {"selected": True})["value"] entries_table = parsed_content.find('table', attrs={ 'border': '0', 'cellpadding': '3' }) # If the entries table doesn't exist, it means that this belongs to an nonexistent or unselected world. if entries_table is None: return None header, subheader, *rows = entries_table.find_all('tr') entries = {} total = None for i, row in enumerate(rows): columns_raw = row.find_all('td') columns = [ c.text.replace('\xa0', ' ').strip() for c in columns_raw ] entry = RaceEntry( last_day_players_killed=int(columns[1]), last_day_killed=int(columns[2]), last_week_players_killed=int(columns[3]), last_week_killed=int(columns[4]), ) if i == len(rows) - 1: total = entry else: entries[columns[0]] = entry return cls(world, entries, total) except AttributeError: raise InvalidContent( "content does not belong to a Tibia.com kill statistics page.")
def from_content(cls, content): """Parse a Tibia.com response into a House object. Parameters ---------- content: :class:`str` HTML content of the page. Returns ------- :class:`House` The house contained in the page, or None if the house doesn't exist. Raises ------ InvalidContent If the content is not the house section on Tibia.com """ parsed_content = parse_tibiacom_content(content) image_column, desc_column, *_ = parsed_content.find_all('td') if "Error" in image_column.text: return None image = image_column.find('img') for br in desc_column.find_all("br"): br.replace_with("\n") description = desc_column.text.replace("\u00a0", " ").replace("\n\n", "\n") lines = description.splitlines() try: name, beds, info, state, *_ = lines except ValueError: raise InvalidContent( "content does is not from the house section of Tibia.com") house = cls(name.strip()) house.image_url = image["src"] house.id = int(id_regex.search(house.image_url).group(1)) m = bed_regex.search(beds) if m: if m.group("type").lower() in ["guildhall", "clanhall"]: house.type = HouseType.GUILDHALL else: house.type = HouseType.HOUSE house.beds = int(m.group("beds")) m = info_regex.search(info) if m: house.world = m.group("world") house.rent = parse_tibia_money(m.group("rent")) house.size = int(m.group("size")) house._parse_status(state) return house
def from_content(cls, content): """Parse the content of a spells page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`Spell` The spell data. If the spell doesn't exist, this will be :obj:`None`. Raises ------ InvalidContent If content is not the HTML of the spells section. """ parsed_content = parse_tibiacom_content(content) try: tables = parse_tibiacom_tables(parsed_content) title_table = parsed_content.find("table", attrs={"class": False}) spell_table = tables["Spell Information"] img = title_table.find("img") url = urllib.parse.urlparse(img["src"]) filename = os.path.basename(url.path) identifier = str(filename.split(".")[0]) next_sibling = title_table.next_sibling description = "" while next_sibling: if isinstance(next_sibling, bs4.Tag): if next_sibling.name == "br": description += "\n" elif next_sibling.name == "table": break else: description += next_sibling.text elif isinstance(next_sibling, bs4.NavigableString): description += str(next_sibling) next_sibling = next_sibling.next_sibling spell = cls._parse_spells_table(identifier, spell_table) spell.description = description.strip() if "Rune Information" in tables: spell.rune = cls._parse_rune_table(tables["Rune Information"]) return spell except (TypeError, AttributeError, IndexError, KeyError) as e: form = parsed_content.find("form") if form: data = parse_form_data(form) if "subtopic=spells" in data.get("__action__"): return None raise errors.InvalidContent("content is not a spell page", e)
def from_content(cls, content): """Gets a guild's war information from Tibia.com's content Parameters ---------- content: :class:`str` The HTML content of a guild's war section in Tibia.com Returns ------- :class:`GuildWars` The guild's war information. """ try: parsed_content = parse_tibiacom_content(content) table_current, table_history = parsed_content.find_all( "div", attrs={"class": "TableContainer"}) current_table_content = table_current.find( "table", attrs={"class": "TableContent"}) current_war = None guild_name = None if current_table_content is not None: for br in current_table_content.find_all("br"): br.replace_with("\n") current_war = cls._parse_current_war_information( current_table_content.text) else: current_war_text = table_current.text current_war_match = war_current_empty.search(current_war_text) guild_name = current_war_match.group(1) history_entries = [] history_contents = table_history.find_all( "table", attrs={"class": "TableContent"}) for history_content in history_contents: for br in history_content.find_all("br"): br.replace_with("\n") entry = cls._parse_war_history_entry(history_content.text) history_entries.append(entry) if current_war: guild_name = current_war.guild_name elif history_entries: guild_name = history_entries[0].guild_name return cls(guild_name, current=current_war, history=history_entries) except ValueError as e: raise InvalidContent( "content does not belong to the guild wars section", e)
def list_from_content(cls, content): """ Gets a list of guilds from the HTML content of the world guilds' page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`list` of :class:`ListedGuild` List of guilds in the current world. ``None`` if it's the list of a world that doesn't exist. Raises ------ InvalidContent If content is not the HTML of a guild's page. """ parsed_content = parse_tibiacom_content(content) selected_world = parsed_content.find('option', selected=True) try: if "choose world" in selected_world.text: # It belongs to a world that doesn't exist return None world = selected_world.text except AttributeError: raise InvalidContent( "Content does not belong to world guild list.") # First TableContainer contains world selector. _, *containers = parsed_content.find_all('div', class_="TableContainer") guilds = [] for container in containers: header = container.find('div', class_="Text") active = "Active" in header.text header, *rows = container.find_all( "tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]}) for row in rows: columns = row.find_all('td') logo_img = columns[0].find('img')["src"] description_lines = columns[1].get_text("\n").split("\n", 1) name = description_lines[0] description = None if len(description_lines) > 1: description = description_lines[1].replace("\r", "").replace( "\n", " ") guild = cls(name, world, logo_img, description, active) guilds.append(guild) return guilds
def from_content(cls, content): """Parse the content of a house list from Tibia.com into a list of houses. Parameters ---------- content: :class:`str` The raw HTML response from the house list. Returns ------- :class:`HouseSection` The houses found in the page. Raises ------ InvalidContent` Content is not the house list from Tibia.com """ try: parsed_content = parse_tibiacom_content(content) tables = parse_tibiacom_tables(parsed_content) house_results = cls() house_results._parse_filters(tables["House Search"]) if len(tables) < 2: return house_results houses_table = tables[list(tables.keys())[0]] _, *rows = houses_table.find_all("tr") for row in rows[1:]: cols = row.find_all("td") if len(cols) != 5: continue name = cols[0].text.replace('\u00a0', ' ') house = HouseEntry(name, house_results.world, 0, town=house_results.town, type=house_results.house_type) size = cols[1].text.replace('sqm', '') house.size = int(size) rent = cols[2].text.replace('gold', '') house.rent = parse_tibia_money(rent) status = cols[3].text.replace('\xa0', ' ') house._parse_status(status) id_input = cols[4].find("input", {'name': 'houseid'}) house.id = int(id_input["value"]) house_results.entries.append(house) return house_results except (ValueError, AttributeError, KeyError) as e: raise InvalidContent( "content does not belong to a Tibia.com house list", e)
def from_content(cls, content): """Creates an instance of the class from the html content of the tournament's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Tournament` The tournament contained in the page, or None if the tournament doesn't exist. Raises ------ InvalidContent If content is not the HTML of a tournament's page. """ try: if "An internal error has occurred" in content: return None if "Currently there is no Tournament running." in content: return None parsed_content = parse_tibiacom_content(content, builder='html5lib') box_content = parsed_content.find("div", attrs={"class": "BoxContent"}) tables = box_content.find_all('table', attrs={"class": "Table5"}) archive_table = box_content.find('table', attrs={"class": "Table4"}) tournament_details_table = tables[-1] info_tables = tournament_details_table.find_all( 'table', attrs={'class': 'TableContent'}) main_info = info_tables[0] rule_set = info_tables[1] score_set = info_tables[2] reward_set = info_tables[3] tournament = cls() tournament._parse_tournament_info(main_info) tournament._parse_tournament_rules(rule_set) tournament._parse_tournament_scores(score_set) tournament._parse_tournament_rewards(reward_set) if archive_table: tournament._parse_archive_list(archive_table) return tournament except IndexError as e: raise InvalidContent( "content does not belong to the Tibia.com's tournament section", e)
def from_content(cls, content): """Creates an instance of the class from the html content of a highscores page. Notes ----- Tibia.com only shows up to 25 entries per page, so in order to obtain the full highscores, all 12 pages must be parsed and merged into one. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Highscores` The highscores results contained in the page. Raises ------ InvalidContent If content is not the HTML of a highscore's page.""" parsed_content = parse_tibiacom_content(content) tables = cls._parse_tables(parsed_content) filters = tables.get("Highscores Filter") if filters is None: raise InvalidContent("content does is not from the highscores section of Tibia.com") world_filter, vocation_filter, category_filter = filters world = world_filter.find("option", {"selected": True})["value"] if world == "": return None category = category_filter.find("option", {"selected": True})["value"] vocation_selected = vocation_filter.find("option", {"selected": True}) vocation = int(vocation_selected["value"]) if vocation_selected else 0 highscores = cls(world, category, vocation=vocation) entries = tables.get("Highscores") if entries is None: return None _, header, *rows = entries info_row = rows.pop() highscores.results_count = int(results_pattern.search(info_row.text).group(1)) for row in rows: cols_raw = row.find_all('td') if "There is currently no data" in cols_raw[0].text: break highscores._parse_entry(cols_raw) return highscores
def list_from_content(cls, content): """Parses the content of a house list from Tibia.com into a list of houses Parameters ---------- content: :class:`str` The raw HTML response from the house list. Returns ------- :class:`list` of :class:`ListedHouse` Raises ------ InvalidContent` Content is not the house list from Tibia.com """ try: parsed_content = parse_tibiacom_content(content) table = parsed_content.find("table") header, *rows = table.find_all("tr") except (ValueError, AttributeError): raise InvalidContent("content does not belong to a Tibia.com house list") m = list_header_regex.match(header.text.strip()) if not m: return None town = m.group("town") world = m.group("world") house_type = HouseType.GUILDHALL if m.group("type") == "Guildhalls" else HouseType.HOUSE houses = [] for row in rows[1:]: cols = row.find_all("td") if len(cols) != 6: continue name = cols[0].text.replace('\u00a0', ' ') house = ListedHouse(name, world, 0, town=town, type=house_type) size = cols[1].text.replace('sqm', '') house.size = int(size) rent = cols[2].text.replace('gold', '') house.rent = parse_tibia_money(rent) status = cols[3].text.replace('\xa0', ' ') house._parse_status(status) id_input = cols[5].find("input", {'name': 'houseid'}) house.id = int(id_input["value"]) houses.append(house) return houses
def from_content(cls, content): """Creates an instance of the class from the HTML content of the guild's page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`Guild` The guild contained in the page or None if it doesn't exist. Raises ------ InvalidContent If content is not the HTML of a guild's page. """ if "An internal error has occurred" in content: return None parsed_content = parse_tibiacom_content(content) try: name_header = parsed_content.find('h1') guild = Guild(name_header.text.strip()) except AttributeError: raise InvalidContent( "content does not belong to a Tibia.com guild page.") if not guild._parse_logo(parsed_content): raise InvalidContent( "content does not belong to a Tibia.com guild page.") info_container = parsed_content.find("div", id="GuildInformationContainer") guild._parse_guild_info(info_container) guild._parse_application_info(info_container) guild._parse_guild_homepage(info_container) guild._parse_guild_guildhall(info_container) guild._parse_guild_disband_info(info_container) guild._parse_guild_members(parsed_content) if guild.guildhall and guild.members: guild.guildhall.owner = guild.members[0].name return guild
def test_parse_pagination_collapse_last_page(self): """Parsing the last page out of 928""" content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades& ;currentpage=1">First Page</a></span> ... <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=925">925</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades& ;currentpage=926">926</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=927">927</a></span> <span class="PageLink FirstOrLastElement"><span class="CurrentPageLink">Last Page</span></span></b></div><div style="float: right;"><b>» Results: 23197</b></div></small></td> """ parsed_content = utils.parse_tibiacom_content(content, builder="html5lib") page, total_pages, results_count = utils.parse_pagination( parsed_content) self.assertEqual(928, page) self.assertEqual(928, total_pages) self.assertEqual(23197, results_count)
def list_from_content(cls, content): """ Gets a list of news from the HTML content of the news search page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`list` of :class:`ListedNews` List of news in the search results. Raises ------ InvalidContent If content is not the HTML of a news search's page. """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) news = [] news_table = tables[0] title_row = news_table.find("td", attrs={ "class": "white", "colspan": "3" }) if title_row.text != "Search Results": raise InvalidContent( "content is not from the news archive section in Tibia.com" ) rows = news_table.find_all("tr", attrs={"class": ["Odd", "Even"]}) for row in rows: cols_raw = row.find_all('td') if len(cols_raw) != 3: continue entry = cls._parse_entry(cols_raw) news.append(entry) return news except (AttributeError, IndexError): raise InvalidContent( "content is not from the news archive section in Tibia.com")
def from_content(cls, content): """Get a list of guilds from the HTML content of the world guilds' page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`GuildsSection` List of guilds in the current world. :obj:`None` if it's the list of a world that doesn't exist. Raises ------ InvalidContent If content is not the HTML of a guild's page. """ try: parsed_content = parse_tibiacom_content(content) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) selected_world = data["world"] if data["world"] else None available_worlds = [w for w in data["__options__"]["world"].values() if w] guilds = cls(selected_world, available_worlds=available_worlds) except AttributeError as e: raise InvalidContent("Content does not belong to world guild list.", e) # First TableContainer contains world selector. _, *containers = parsed_content.find_all('div', class_="TableContainer") for container in containers: header = container.find('div', class_="Text") active = "Active" in header.text header, *rows = container.find_all("tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]}) for row in rows: columns = row.find_all('td') logo_img = columns[0].find('img')["src"] description_lines = columns[1].get_text("\n").split("\n", 1) name = description_lines[0] description = None if len(description_lines) > 1: description = description_lines[1].replace("\r", "").replace("\n", " ") guild = GuildEntry(name, guilds.world, logo_img, description, active) guilds.entries.append(guild) return guilds
def from_content(cls, content): """Parses a Tibia.com response into a :class:`World`. Parameters ---------- content: :class:`str` The raw HTML from the server's information page. Returns ------- :class:`World` The World described in the page, or ``None``. Raises ------ InvalidContent If the provided content is not the html content of the world section in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = cls._parse_tables(parsed_content) try: error = tables.get("Error") if error and error[0].text == "World with this name doesn't exist!": return None selected_world = parsed_content.find('option', selected=True) world = cls(selected_world.text) world._parse_world_info(tables.get("World Information", [])) online_table = tables.get("Players Online", []) world.online_players = [] for row in online_table[1:]: cols_raw = row.find_all('td') name, level, vocation = (c.text.replace('\xa0', ' ').strip() for c in cols_raw) world.online_players.append( OnlineCharacter(name, world.name, int(level), vocation)) except AttributeError: raise InvalidContent( "content is not from the world section in Tibia.com") return world
def from_content(cls, content): """Create an instance of the class from the html content of the creature library's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Character` The character contained in the page. Raises ------ InvalidContent If content is not the HTML of a creature library's page. """ try: parsed_content = parse_tibiacom_content(content) boosted_creature_table = parsed_content.find("div", {"class": "TableContainer"}) boosted_creature_text = boosted_creature_table.find("div", {"class": "Text"}) if not boosted_creature_text or "Boosted" not in boosted_creature_text.text: return None boosted_creature_link = boosted_creature_table.find("a") url = urllib.parse.urlparse(boosted_creature_link["href"]) query = urllib.parse.parse_qs(url.query) boosted_creature = CreatureEntry(boosted_creature_link.text, query["race"][0]) list_table = parsed_content.find("div", style=lambda v: v and 'display: table' in v) entries_container = list_table.find_all("div", style=lambda v: v and 'float: left' in v) entries = [] for entry_container in entries_container: name = entry_container.text.strip() link = entry_container.find("a") url = urllib.parse.urlparse(link["href"]) query = urllib.parse.parse_qs(url.query) entries.append(CreatureEntry(name, query["race"][0])) return cls(boosted_creature, entries) except (AttributeError, ValueError) as e: raise InvalidContent("content is not the creature's library", e)
def from_content(cls, content): """Get a list of news from the HTML content of the news search page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`NewsArchive` The news archive with the news found. Raises ------ InvalidContent If content is not the HTML of a news search's page. """ try: parsed_content = parse_tibiacom_content(content) tables = parse_tibiacom_tables(parsed_content) if "News Archive Search" not in tables: raise InvalidContent( "content is not from the news archive section in Tibia.com" ) form = parsed_content.find("form") news_archive = cls._parse_filtering(form) if "Search Results" in tables: rows = tables["Search Results"].find_all( "tr", attrs={"class": ["Odd", "Even"]}) for row in rows: cols_raw = row.find_all('td') if len(cols_raw) != 3: continue entry = cls._parse_entry(cols_raw) news_archive.entries.append(entry) return news_archive except (AttributeError, IndexError, ValueError, KeyError) as e: raise InvalidContent( "content is not from the news archive section in Tibia.com", e)
def from_content(cls, content): """Create an instance of the class from the html content of a highscores page. Notes ----- Tibia.com only shows up to 50 entries per page, so in order to obtain the full highscores, all pages must be obtained individually and merged into one. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Highscores` The highscores results contained in the page. Raises ------ InvalidContent If content is not the HTML of a highscore's page. """ parsed_content = parse_tibiacom_content(content) form = parsed_content.find("form") tables = cls._parse_tables(parsed_content) if form is None: if "Error" in tables and "The world doesn't exist!" in tables["Error"].text: return None raise InvalidContent("content does is not from the highscores section of Tibia.com") highscores = cls(None) highscores._parse_filters_table(form) last_update_container = parsed_content.find("span", attrs={"class": "RightArea"}) if last_update_container: m = numeric_pattern.search(last_update_container.text) highscores.last_updated = datetime.timedelta(minutes=int(m.group(1))) if m else datetime.timedelta() entries_table = tables.get("Highscores") highscores._parse_entries_table(entries_table) return highscores
def test_parse_pagination_collapsed_first_page(self): """Parsing with current page 1 out of 915""" content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink FirstOrLastElement"><span class="CurrentPageLink">First Page</span></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=2">2</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades& ;currentpage=3">3</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=4">4</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades& ;currentpage=5">5</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=6">6</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades& ;currentpage=7">7</a></span> ... <span class="PageLink FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=pastcharactertrades&currentpage=915">Last Page</a></span></b></div><div style="float: right;"><b>» Results: 22874</b></div></small></td>""" parsed_content = utils.parse_tibiacom_content(content, builder="html5lib") page, total_pages, results_count = utils.parse_pagination( parsed_content) self.assertEqual(1, page) self.assertEqual(915, total_pages) self.assertEqual(22874, results_count)
def from_content(cls, content): """Create an instance of the class from the html content of the creature library's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Creature` The character contained in the page. """ try: parsed_content = parse_tibiacom_content(content) pagination_container, content_container = \ parsed_content.find_all("div", style=lambda v: v and 'position: relative' in v) title_container, description_container = content_container.find_all("div") title = title_container.find("h2") name = title.text.strip() img = title_container.find("img") img_url = img["src"] race = img_url.split("/")[-1].replace(".gif", "") creature = cls(name, race) paragraph_tags = description_container.find_all("p") paragraphs = [p.text for p in paragraph_tags] creature.description = "\n".join(paragraphs[:-2]) hp_text = paragraphs[-2] creature._parse_hp_text(hp_text) exp_text = paragraphs[-1] creature._parse_exp_text(exp_text) return creature except ValueError: return None
def list_from_content(cls, content): """Parses the content of a board list Tibia.com into a list of boards. Parameters ---------- content: :class:`str` The raw HTML response from the board list. Returns ------- :class:`list` of :class:`ListedBoard` Raises ------ InvalidContent` Content is not a board list in Tibia.com """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) _, board_list_table, timezone_table = tables _, *board_rows = board_list_table.find_all("tr") timezone_text = timezone_table.text timezone = timezone_regex.search(timezone_text).group(1) offset = 1 if timezone == "CES" else 2 boards = [] for board_row in board_rows[:-3]: try: board = cls._parse_board_row(board_row, offset) except IndexError: continue else: boards.append(board) return boards except ValueError as e: raise errors.InvalidContent("content does not belong to a forum section.", e)
def from_content(cls, content): """Creates an instance of the class from the html content of the character's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`Character` The character contained in the page, or None if the character doesn't exist Raises ------ InvalidContent If content is not the HTML of a character's page. """ parsed_content = parse_tibiacom_content(content) tables = cls._parse_tables(parsed_content) char = Character() if "Could not find character" in tables.keys(): return None if "Character Information" in tables.keys(): char._parse_character_information(tables["Character Information"]) else: raise InvalidContent( "content does not contain a tibia.com character information page." ) char._parse_achievements(tables.get("Account Achievements", [])) if "Account Badges" in tables: char._parse_badges(tables["Account Badges"]) char._parse_deaths(tables.get("Character Deaths", [])) char._parse_account_information(tables.get("Account Information", [])) char._parse_other_characters(tables.get("Characters", [])) return char
def test_parse_pagination_collapsed_middle(self): """Parsing page 300 out of 503""" content = """<td class="PageNavigation"><small><div style="float: left;"><b>» <span class="PageLink FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades& ;currentpage=1">First Page</a></span> ... <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades¤tpage=297">297</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades ¤tpage=298">298</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades¤tpage=299">299</a></span> <span class="PageLink "><span class="CurrentPageLink">300</span></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades¤tpage=301">301</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades ¤tpage=302">302</a></span> <span class="PageLink "><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades¤tpage=303">303</a></span> ... <span class="PageLink FirstOrLastElement"><a href="https://www.tibia.com/charactertrade/?subtopic=currentcharactertrades¤tpage=503">Last Page</a></span></b></div><div style="float: right;"><b>» Results: 12568</b></div></small></td> """ parsed_content = utils.parse_tibiacom_content(content, builder="html5lib") page, total_pages, results_count = utils.parse_pagination( parsed_content) self.assertEqual(300, page) self.assertEqual(503, total_pages) self.assertEqual(12568, results_count)
def from_content(cls, content): """Creates an instance of the class from the html content of the thread's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`ForumThread` The thread contained in the page, or None if the thread doesn't exist Raises ------ InvalidContent If content is not the HTML of a thread's page. """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum thread.") try: if len(root_tables) == 4: forum_info_table, title_table, posts_table, footer_table = root_tables else: forum_info_table, title_table, footer_table = root_tables posts_table = None except ValueError as e: raise errors.InvalidContent("content is not a Tibia.com forum thread.", e) header_text = forum_info_table.text section, board, *_ = split_list(header_text, "|", "|") thread = cls(section=section, board=board) thread.title = title_table.text.strip() golden_frame = title_table.find("div", attrs={"class": "CipPost"}) thread.golden_frame = golden_frame is not None timezone = timezone_regex.search(footer_table.text).group(1) time_page_column, navigation_column = footer_table.find_all("td", attrs={"class", "ff_white"}) page_links = time_page_column.find_all("a") if page_links: last_link = page_links[-1]["href"] thread.page = int(footer_table.find("span").text) thread.total_pages = max(int(page_number_regex.search(last_link).group(1)), thread.page) navigation_links = navigation_column.find_all("a") if len(navigation_links) == 2: prev_link, next_link = navigation_links prev_link_url = prev_link["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) next_link_url = next_link["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) elif "Previous" in navigation_links[0].text: prev_link_url = navigation_links[0]["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) else: next_link_url = navigation_links[0]["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) offset = 1 if timezone == "CES" else 2 if posts_table: thread_info_table, *post_tables = posts_table.find_all("div", attrs={"class": "ForumPost"}) inner_info_table = thread_info_table.find("table") thread_num_col, thread_pages_col, thread_navigation_col = inner_info_table.find_all("td") thread.thread_id = int(thread_num_col.text.replace("Thread #", "")) for post_table in post_tables: post = cls._parse_post_table(post_table, offset) thread.posts.append(post) return thread
def from_content(cls, content): """Parses the board's HTML content from Tibia.com. Parameters ---------- content: :class:`str` The HTML content of the board. Returns ------- :class:`ForumBoard` The forum board contained. Raises ------ InvalidContent` Content is not a board in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") try: header_table, time_selector_table, threads_table, timezone_table, boardjump_table, *_ = tables except ValueError as e: raise errors.InvalidContent("content is not a forum board", e) header_text = header_table.text.strip() section, name = split_list(header_text, "|", "|") board = cls(name=name, section=section) thread_rows = threads_table.find_all("tr") age_selector = time_selector_table.find("select") if not age_selector: return cls(section=section, name=name) selected_age = age_selector.find("option", {"selected": True}) if selected_age: board.age = int(selected_age["value"]) board_selector = boardjump_table.find("select") selected_board = board_selector.find("option", {"selected": True}) board.board_id = int(selected_board["value"]) page_info = threads_table.find("td", attrs={"class": "ff_info"}) if page_info: current_page_text = page_info.find("span") page_links = page_info.find_all("a") if current_page_text: board.page = int(current_page_text.text) board.total_pages = max(board.page, int(page_number_regex.search(page_links[-1]["href"]).group(1))) for thread_row in thread_rows[1:]: columns = thread_row.find_all("td") if len(columns) != 7: continue entry = cls._parse_thread_row(columns) if isinstance(entry, ListedThread): board.threads.append(entry) cip_border = thread_row.find("div", attrs={"class": "CipBorder"}) if cip_border: entry.golden_frame = True elif isinstance(entry, ListedAnnouncement): board.announcements.append(entry) return board