def parse_reply(item: BeautifulSoup): users = [] for u in item.find_all("a", attrs={ "class": "tshuz_at", "target": "_blank" }): users.append( User(process_html_string(u.get_text()), User.extract_uid(u.get("href")))) if len(users) < 1: raise Exception("Can't find user information") content = process_html_string( item.find("span", attrs={ "class": "tshuz_cnt_main" }).get_text()) tick = item.find("div", attrs={ "class": "tshuz_time" }).find("span").get("title") return Reply(users[0], content=content, tick=tick, user_to=users[1] if len(users) >= 2 else None)
def parse_content_text(item): content = item.find("td", attrs={"class": "t_f"}) if content is None: content = item.find("div", attrs={"class": "t_f"}) if content is None: raise Exception("Can't find content container in item block.") # Remove no permission tip and image text tip remove_list = content.find_all( "div", attrs={"class": "attach_nopermission attach_tips"}) if remove_list is not None: [s.extract() for s in remove_list] remove_list = content.find_all("div", attrs={"class": "tip tip_4 aimg_tip"}) if remove_list is not None: [s.extract() for s in remove_list] return process_html_string(content.get_text())
def parse_user(a_tag): return User(name=process_html_string(a_tag.get_text()), uid=User.extract_uid(a_tag.get("href")))
def comments(self): return [process_html_string(c.get_text()) for c in self.content_list]