Пример #1
0
    def running(self):
        # 执行网页请求
        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "Accept-Language":
            "zh-CN,zh;q=0.9",
            "Cache-Control":
            "no-cache",
            "Connection":
            "keep-alive",
            "Host":
            "s.weibo.com",
            "Pragma":
            "no-cache",
            "Sec-Fetch-Dest":
            "document",
            "Sec-Fetch-Mode":
            "navigate",
            "Sec-Fetch-Site":
            "cross-site",
            "Sec-Fetch-User":
            "******",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"
        }

        response = requests.get("https://s.weibo.com/top/summary",
                                headers=headers)  # 请求微博热搜榜
        bs = BeautifulSoup(response.content.decode(errors="ignore"), "lxml")

        # 解析网页
        hot_list = list()
        empty_rank = 0  # 统计空热搜(广告热搜)数量
        for label_item in bs.select(
                "#pl_top_realtimehot > table > tbody > tr"):  # 遍历热搜的标签
            # 提取热搜排名
            if label_rank := label_item.select_one("tr > td.td-01"):
                if len(label_rank.text) == 0:
                    continue
                if match_rank := re.search("[0-9]+", label_rank.text):
                    ranking = int(match_rank.group()) - empty_rank
                else:
                    tool.console("报错", "提取的热搜排名不包含数字!")
                    continue
Пример #2
0
    def running(self):
        # 执行网页请求
        response = tool.do_request("https://s.weibo.com/top/summary", headers=self._HEADERS)  # 请求微博热搜榜
        bs = BeautifulSoup(response.content.decode(errors="ignore"), "lxml")

        # 解析网页
        hot_list = []
        empty_rank = 0  # 统计空热搜(广告热搜)数量
        for label_item in bs.select("#pl_top_realtimehot > table > tbody > tr"):  # 遍历热搜的标签
            # 提取热搜排名
            if label_rank := label_item.select_one("tr > td.td-01"):
                if len(label_rank.text) == 0:
                    continue
                if match_rank := re.search("[0-9]+", label_rank.text):
                    ranking = int(match_rank.group()) - empty_rank
                else:
                    tool.console("报错", "提取的热搜排名不包含数字!")
                    continue
Пример #3
0
        # 解析网页
        hot_list = []
        empty_rank = 0  # 统计空热搜(广告热搜)数量
        for label_item in bs.select(
                "#pl_top_realtimehot > table > tbody > tr"):  # 遍历热搜的标签
            # 提取热搜排名
            if label_rank := label_item.select_one("tr > td.td-01"):
                if len(label_rank.text) == 0:
                    continue
                if match_rank := re.search("[0-9]+", label_rank.text):
                    ranking = int(match_rank.group()) - empty_rank
                else:
                    tool.console("报错", "提取的热搜排名不包含数字!")
                    continue
            else:
                tool.console("报错", "未提取到热搜排名!")
                continue

            # 提取热搜关键词
            if label_keyword := label_item.select_one("tr > td.td-02 > a"):
                keyword = label_keyword.text
            else:
                tool.console("报错", "未提取到热搜关键词!")
                continue

            # 提取热搜热度
            if label_heat := label_item.select_one("tr > td.td-02 > span"):
                if match_heat := re.search("[0-9]+", label_heat.text):
                    heat = int(match_heat.group())
                else:
                    tool.console("报错", "提取的热搜热度不包含数字!")