def _parse_filtering(cls, form): form_data = parse_form_data(form) start_date = datetime.date( int(form_data.pop("filter_begin_year")), int(form_data.pop("filter_begin_month")), int(form_data.pop("filter_begin_day")), ) end_date = datetime.date( int(form_data.pop("filter_end_year")), int(form_data.pop("filter_end_month")), int(form_data.pop("filter_end_day")), ) news_types = [] for news_type in NewsType: value = form_data.pop(news_type.filter_name, None) if value: news_types.append(news_type) categories = [] for category in NewsCategory: value = form_data.pop(category.filter_name, None) if value: categories.append(category) return cls(start_date=start_date, end_date=end_date, categories=categories, types=news_types)
def from_content(cls, content): """Parse the content of the leaderboards page. Parameters ---------- content: :class:`str` The HTML content of the leaderboards page. Returns ------- :class:`Leaderboard` The ledaerboard if found. """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", {"class": "TableContent"}) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) current_world = data["world"] current_rotation = None rotations = [] for label, value in data["__options__"]["rotation"].items(): current = False if "Current" in label: label = "".join(rotation_end_pattern.findall(label)) current = True rotation_end = parse_tibia_datetime(label) rotation = LeaderboardRotation(int(value), rotation_end, current) if value == data["rotation"]: current_rotation = rotation rotations.append(rotation) leaderboard = cls(current_world, current_rotation) leaderboard.available_worlds = [ w for w in data["__options__"]["world"].values() if w ] leaderboard.available_rotations = rotations if leaderboard.rotation and leaderboard.rotation.current: last_update_table = tables[2] numbers = re.findall(r'(\d+)', last_update_table.text) if numbers: leaderboard.last_update = datetime.timedelta( minutes=int(numbers[0])) leaderboard._parse_entries(tables[-1]) pagination_block = parsed_content.find("small") pages, total, count = parse_pagination( pagination_block) if pagination_block else (0, 0, 0) leaderboard.page = pages leaderboard.total_pages = total leaderboard.results_count = count return leaderboard except (AttributeError, ValueError) as e: raise errors.InvalidContent( "content does not belong to the leaderboards", e)
def from_content(cls, content): """Parse the content of a spells page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`Spell` The spell data. If the spell doesn't exist, this will be :obj:`None`. Raises ------ InvalidContent If content is not the HTML of the spells section. """ parsed_content = parse_tibiacom_content(content) try: tables = parse_tibiacom_tables(parsed_content) title_table = parsed_content.find("table", attrs={"class": False}) spell_table = tables["Spell Information"] img = title_table.find("img") url = urllib.parse.urlparse(img["src"]) filename = os.path.basename(url.path) identifier = str(filename.split(".")[0]) next_sibling = title_table.next_sibling description = "" while next_sibling: if isinstance(next_sibling, bs4.Tag): if next_sibling.name == "br": description += "\n" elif next_sibling.name == "table": break else: description += next_sibling.text elif isinstance(next_sibling, bs4.NavigableString): description += str(next_sibling) next_sibling = next_sibling.next_sibling spell = cls._parse_spells_table(identifier, spell_table) spell.description = description.strip() if "Rune Information" in tables: spell.rune = cls._parse_rune_table(tables["Rune Information"]) return spell except (TypeError, AttributeError, IndexError, KeyError) as e: form = parsed_content.find("form") if form: data = parse_form_data(form) if "subtopic=spells" in data.get("__action__"): return None raise errors.InvalidContent("content is not a spell page", e)
def _parse_filters_table(self, form): """ Parse the filters table found in a highscores page. Parameters ---------- form: :class:`bs4.Tag` The table containing the filters. """ data = parse_form_data(form, include_options=True) self.world = data["world"] if data.get("world") else None self.battleye_filter = try_enum(BattlEyeHighscoresFilter, parse_integer(data.get("beprotection"), None)) self.category = try_enum(Category, parse_integer(data.get("category"), None)) self.vocation = try_enum(VocationFilter, parse_integer(data.get("profession"), None), VocationFilter.ALL) checkboxes = form.find_all("input", {"type": "checkbox", "checked": "checked"}) values = [int(c["value"]) for c in checkboxes] self.pvp_types_filter = [try_enum(PvpTypeFilter, v) for v in values] self.available_words = [v for v in data["__options__"]["world"].values() if v]
def from_content(cls, content): """Get a list of guilds from the HTML content of the world guilds' page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`GuildsSection` List of guilds in the current world. :obj:`None` if it's the list of a world that doesn't exist. Raises ------ InvalidContent If content is not the HTML of a guild's page. """ try: parsed_content = parse_tibiacom_content(content) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) selected_world = data["world"] if data["world"] else None available_worlds = [w for w in data["__options__"]["world"].values() if w] guilds = cls(selected_world, available_worlds=available_worlds) except AttributeError as e: raise InvalidContent("Content does not belong to world guild list.", e) # First TableContainer contains world selector. _, *containers = parsed_content.find_all('div', class_="TableContainer") for container in containers: header = container.find('div', class_="Text") active = "Active" in header.text header, *rows = container.find_all("tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]}) for row in rows: columns = row.find_all('td') logo_img = columns[0].find('img')["src"] description_lines = columns[1].get_text("\n").split("\n", 1) name = description_lines[0] description = None if len(description_lines) > 1: description = description_lines[1].replace("\r", "").replace("\n", " ") guild = GuildEntry(name, guilds.world, logo_img, description, active) guilds.entries.append(guild) return guilds
def from_content(cls, content): """Create an instance of the class from the HTML content of the kill statistics' page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`KillStatistics` The kill statistics contained in the page or None if it doesn't exist. Raises ------ InvalidContent If content is not the HTML of a kill statistics' page. """ try: parsed_content = parse_tibiacom_content(content) entries_table = parsed_content.find('table', attrs={ 'border': '0', 'cellpadding': '3' }) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) world = data["world"] available_worlds = list(data["__options__"]["world"].values()) if not entries_table: entries_table = parsed_content.find("table", {"class": "Table3"}) # If the entries table doesn't exist, it means that this belongs to an nonexistent or unselected world. if entries_table is None: return None header, subheader, *rows = entries_table.find_all('tr') entries = {} total = None for i, row in enumerate(rows): columns_raw = row.find_all('td') columns = [ c.text.replace('\xa0', ' ').strip() for c in columns_raw ] if not columns[2].isnumeric(): continue entry = RaceEntry(last_day_players_killed=int(columns[1]), last_day_killed=int(columns[2]), last_week_players_killed=int(columns[3]), last_week_killed=int(columns[4])) if i == len(rows) - 1: total = entry else: entries[columns[0]] = entry return cls(world, entries, total, available_worlds=available_worlds) except AttributeError as e: raise InvalidContent( "content does not belong to a Tibia.com kill statistics page.", e)
def from_content(cls, content): """Parse the content of the spells section. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`SpellsSection` The spells contained and the filtering information. Raises ------ InvalidContent If content is not the HTML of the spells section. """ try: parsed_content = parse_tibiacom_content(content) table_content_container = parsed_content.find( "div", attrs={"class": "InnerTableContainer"}) spells_table = table_content_container.find( "table", class_=lambda t: t != "TableContent") spell_rows = spells_table.find_all( "tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]}) spells_section = cls() for row in spell_rows: columns = row.find_all("td") if len(columns) != 7: continue spell_link = columns[0].find("a") url = urllib.parse.urlparse(spell_link["href"]) query = urllib.parse.parse_qs(url.query) cols_text = [c.text for c in columns] identifier = query["spell"][0] match = spell_name.findall(cols_text[0]) name, words = match[0] group = try_enum(SpellGroup, cols_text[1]) spell_type = try_enum(SpellType, cols_text[2]) level = int(cols_text[3]) mana = parse_integer(cols_text[4], None) price = parse_integer(cols_text[5], 0) premium = "yes" in cols_text[6] spell = SpellEntry(name=name.strip(), words=words.strip(), spell_type=spell_type, level=level, group=group, mana=mana, premium=premium, price=price, identifier=identifier) spells_section.entries.append(spell) form = parsed_content.find("form") data = parse_form_data(form) spells_section.vocation = try_enum(VocationSpellFilter, data["vocation"]) spells_section.group = try_enum(SpellGroup, data["group"]) spells_section.premium = try_enum(SpellGroup, data["group"]) spells_section.spell_type = try_enum(SpellType, data["type"]) spells_section.sort_by = try_enum(SpellSorting, data["sort"]) spells_section.premium = "yes" in data["premium"] if data[ "premium"] else None return spells_section except (AttributeError, TypeError) as e: raise errors.InvalidContent( "content does not belong to the Spells section", e)