示例#1
0
文件: stats.py 项目: YBadiss/WhatStat
def _compute_emojis(messages):
    full_text = '\n'.join([m.text for m in messages])
    emojis = str_to_dict(full_text, _emoji_set)
    return {
        'count': sum(emojis.values()),
        'most_used': top((c, e) for e, c in emojis.items()) if emojis else None
    }
    def crawl(self, offset=0):
        """
        更多历史消息文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=MzA3NTY3NjUzMg==&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=MzAzMDI4NDA4OQ%3D%3D&key=9701b8bf0b69875af2605bece0b84ac5791a9cc45d7defc08d4f127acdab2c3ae739842f9f0fd569da86b2db1f91f27c9dff5bab9e5116263572442d555a91bb1cb7b502a908e8443a130fcd032293e6&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&wxtoken=&appmsg_token=1072_6j04Jj7bozTq%252FaFAuTDRNbD8bM4iFMuP3Lbb1g~~&x5=0&f=json".format(
            offset=offset)
        headers = """Host: mp.weixin.qq.com
Connection: keep-alive
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat
X-Requested-With: XMLHttpRequest
Accept: */*
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzA3NTY3NjUzMg==&scene=124&uin=MzAzMDI4NDA4OQ%3D%3D&key=9701b8bf0b69875af2605bece0b84ac5791a9cc45d7defc08d4f127acdab2c3ae739842f9f0fd569da86b2db1f91f27c9dff5bab9e5116263572442d555a91bb1cb7b502a908e8443a130fcd032293e6&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&a8scene=7&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&winzoom=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4
Cookie: rewardsn=; wxtokenkey=777; wxuin=3030284089; devicetype=android-27; version=27000f8d; lang=zh_CN; pass_ticket=ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68Ffs6uiXclfr; wap_sid2=CLnu+aQLElx6WkYxZHVSVFZOWm01YWhJcjdKSWVLVXozSlNIdnlDY1I5bkpGQ1hMcFNSamZNMDVIZDU2TGQ5ek1TQkpjRVVkZ0tncXZXZDVvUXZza0VTcmhwRlUwakFFQUFBfjCp5KX5BTgNQJVO"""
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据: offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)  # 保存到数据库
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(3)
                self.crawl(next_offset)
        else:
            # 错误消息
            logger.error("错误消息,请检查请求头")
            exit()
示例#3
0
    def crawl_more(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=getmsg&" \
              "__biz=MjM5MzgyODQxMQ==&" \
              "f=json&" \
              "offset={offset}&" \
              "count=10&" \
              "is_ok=1&" \
              "scene=&" \
              "uin=777&" \
              "key=777&" \
              "pass_ticket=lDUqy%2FK9AFwZZVe8RDrxTER0kM1SAjDpAhfBgj2bW4QyP8V2lsi9gMbN%2FTJ0Nq7w&" \
              "wxtoken=&" \
              "appmsg_token=935_GxAo%2BiWnvCj80gcRR5iWbvAetsVErB5CLhSNJg~~&" \
              "x5=1&" \
              "f=json".format(offset=offset)  # appmsg_token 也是临时的

        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; 2014813 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN
Accept: */*
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzIwMTc4ODE0Mw==&devicetype=android-22&version=26051731&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_5138cebd4585&pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK%2BYA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: rewardsn=1872e6d4042552713dff; wxtokenkey=990d7740963fdbea6cf44ba78c4166b7f9025262edc22a0977c781f8073b54aa; wxuin=525477518; devicetype=android-22; version=26051731; lang=zh_CN; pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK+YA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n; wap_sid2=CI7NyPoBElxNWnRoazhJNk5CQkdYQ3NBRXZBNmVYbDVncS1yVkdsVjFQYUpCU3J6UGZ1WmxubC1wT3p4WkE4ZFY0dnRpaDdlZnJ1ank4dkdZbXpnNUd2V2hpbjNQS2NEQUFBfjCimMPRBTgMQJRO
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= 2014813 &RL=720*1280&OS=5.1.1&API=22
Q-GUID: 9d4417681f44eeb4410b613d13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b




        """
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                # TODO 等待时间做成可配置
                time.sleep(2)
                self.crawl_more(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新获取请求参数和请求头")
            exit()
示例#4
0
    def crawl(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=getmsg&" \
              "__biz=MjM5MzgyODQxMQ==&" \
              "f=json&" \
              "offset={offset}&" \
              "count=10&" \
              "is_ok=1&" \
              "scene=&" \
              "uin=777&" \
              "key=777&" \
              "pass_ticket=mXHYjLnkYux1rXx8BxNrZpgW4W+yLZxcuvpDWlxbBrjvJo3ECB+ckDAsy/TJJK6P&" \
              "wxtoken=&" \
              "appmsg_token=938_dFy7Mic8412%2BQG9szSTRTLb2u5DrwFqmTk4ZAg~~&" \
              "x5=1&" \
              "f=json".format(offset=offset)  # appmsg_token 是临时的,也需要更新

        # 从 Fiddler 获取最新的请求头参数
        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 7.0; MI 5 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043804 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN
Accept: */*
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&devicetype=android-24&version=26051732&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_c744c4d09c36&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: pgv_pvi=1680185344; pgv_si=s6583349248; wxtokenkey=a40c0cde8d7c0a549e900a166819c80622ce7f12899bd6e25f5d5275ff18f7c6; rewardsn=9a0c2a83b30e5994c162; wxuin=528927841; devicetype=android-24; version=26051732; lang=zh_CN; pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0; wap_sid2=COGYm/wBElxsRzJDSS1ZTjlmLVFTRlBYZ3FiV2NBUGZHLUlnMzU5V3lEV1RsSHhJSVp2aWlZc1lxRW9NTnJfb1pzbUw5Zm9vMzhuZ0plU2N2X2lLRExsWGNSVjdDcW9EQUFBfjC4grfSBTgMQJRO
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043804&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MI5 &RL=1080*1920&OS=7.0&API=24
Q-GUID: ed3467186e1125bb3d28234d13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b



"""
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
            exit()
示例#5
0
文件: bot.py 项目: Nassty/excheck
    def onMessage(self, msg):
        if msg["type"] == 'chat' and hasattr(msg, "body"):
            cmd_list = str(msg.body).split(" ")
            cmd = cmd_list[0]
            args = cmd_list[1:]
            kwargs = {}
            if "=" in cmd:
                args = []
                kwargs = utils.str_to_dict(" ".join(cmd_list[1:]))

            commands.get_command(cmd)(self, msg, *args, **kwargs)
示例#6
0
 def get_params(self, article_url):
     """
     获取到文章url上的请求参数
     :param article_url: 文章 url
     :return:
     """
     # url转义处理
     article_url = html.unescape(article_url)
     """获取文章链接的参数"""
     url_params = utils.str_to_dict(urlsplit(article_url).query, "&", "=")
     return url_params
示例#7
0
    def crawl_latest_10(self):
        """
        爬取最近10条数据
        :return:
        """

        url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=home&" \
              "__biz=MjM5MzgyODQxMQ==&" \
              "devicetype=android-24&" \
              "version=26051633&" \
              "lang=zh_CN&" \
              "nettype=WIFI&" \
              "a8scene=7&" \
              "pass_ticket=oZQqv0KR7zhxAix1SHUFLwI7p%2FiKH2NPWIdEmZidhitAOdpf873t%2BLEZU9Hnxx%2FT&" \
              "wx_header=1"

        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; 2014813 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN
x-wechat-uin: NTI1NDc3NTE4
x-wechat-key: c37a3f1c3525d70e6537599058680a1c1d38d754815c6f101c7bf3fbc5bbcd19f8c54895c965c0d624b36d11da34033eb5109c5e40524df3109f43943505a19d0b3f68bacb0b77cbae35a251a1722f98
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/wxpic,image/sharpp,*/*;q=0.8
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: rewardsn=1872e6d4042552713dff; wxtokenkey=990d7740963fdbea6cf44ba78c4166b7f9025262edc22a0977c781f8073b54aa; wxuin=525477518; devicetype=android-22; version=26051731; lang=zh_CN; pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK+YA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n; wap_sid2=CI7NyPoBElxldXVmX1B4VVVJbXdNTnh4SDZ2YXlMcURDWENucDhvWmZoMktqRmQzYWJVRXV6b29TYmJWX2VscG4zekh2ZzVxWVhmcGpraDBLUEg3LTZZNmNXYXl2S1lEQUFBfjCQmMPRBTgMQJRO
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= 2014813 &RL=720*1280&OS=5.1.1&API=22
Q-GUID: 9d4417681f44eeb4410b613d13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b


    
        """
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)

        if '<title>验证</title>' in response.text:
            logger.error("无法正确获取内容,请重新获取请求参数和请求头")
            exit()

        rex = "msgList = '({.*?})'"
        pattern = re.compile(pattern=rex, flags=re.S)
        match = pattern.search(response.text)
        if match:
            msg_list = match.group(1)
            logger.info("抓取数据: %s" % msg_list)
            msg_list = html.unescape(html.unescape(msg_list))
            self.save(msg_list)
        else:
            logger.warning("没有找到匹配的数据")
示例#8
0
    def test_str_to_dict(self):
        assert utils.str_to_dict(None) is None

        # Basic behavior for poorly formed inputs
        self.assertEquals(None, utils.str_to_dict(None))
        self.assertEquals(None, utils.str_to_dict(""))
        self.assertEquals({}, utils.str_to_dict("no_vals"))

        # Unnested arguments lists are parsed
        expected_dict_simple = {
            "url": "http://foobar.com/justin",
            "args": "foo",
            "otherKey": "42"
        }
        self.assertEquals(expected_dict_simple,
                          utils.str_to_dict(self.simple_url_input_string))

        # Nested List Behavior
        expected_dict_nested = {
            "url": "http://foobar.com/justin",
            "args": ["foo", "justin", "bar"],
            "otherKey": "AnotherKey",
            "finalKey": "42"
        }
        self.assertEquals(expected_dict_nested,
                          utils.str_to_dict(self.nested_url_input_string))
示例#9
0
  def crawl(self, offset=0):
    """
    爬取更多文章
    :return:
    """
    url = "https://mp.weixin.qq.com/mp/profile_ext"\
          "?action=getmsg" \
          "&__biz=MjM5MTY3OTYyMQ==" \
          "&f=json"\
          "&offset=10"\
          "&count=10"\
          "&is_ok=1"\
          "&scene=124"\
          "&uin=777"\
          "&key=777"\
          "&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs"\
          "&wxtoken="\
          "&appmsg_token=938_DJXrTDktnbNkCkp3oOCg-lVbWhhbemMp0EkBLA~~"\
          "&x5=0"\
          "&f=json"

    headers = """
    Host: mp.weixin.qq.com
    Accept-Encoding: br, gzip, deflate
    Cookie: devicetype=iOS11.2.1; lang=zh_CN; pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs; version=16060124; wap_sid2=CLzPvfkEEnBnRTdYc3Uxa1B4NzcxdUR3T3pyNEUwYVBRb09SQlk0MWk1bTVkdVdkUlpSdTh4QUExdWNVYkVxLTN0NnVVWVZhYk5VRXJCT1hpSmI1N0FCaDNmVURBclVSSlhOcDZXZDNpTXJPZkQ0NVd1S3FBd0FBMJ3S1tIFOAxAlE4=; wxuin=1328506812; wxtokenkey=f9b3cef513031f0f50b8c9d88862ea0a8e44ae03f2dabb3e1e7d6b6f68c2233c; bk_token=fc1a7e18-e120-4f0a-a419-e1650f99b989; is_login=wx; is_wechat=1; platform=qq; pgv_pvid=6280301858; _scan_has_moon=1; eas_sid=Q1g5I1q201w0y4d0o2W5z3N4j4; _ga=GA1.2.557769603.1510924852; pac_uid=0_64f45f141b0a7; sd_cookie_crttime=1508507376988; sd_userid=58871508507376987
    Connection: keep-alive
    Accept: */*
    User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN
    Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTY3OTYyMQ==&scene=124&devicetype=iOS11.2.1&version=16060124&lang=zh_CN&nettype=WIFI&a8scene=3&fontScale=100&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs&wx_header=1
    Accept-Language: zh-cn
    X-Requested-With: XMLHttpRequest
    """
    
    
    headers = utils.str_to_dict(headers)
    response = requests.get(url, headers=headers, verify=False)
    result = response.json()

    if result.get("ret") == 0:
        msg_list = result.get("general_msg_list")
        logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
         # 递归调用
        has_next = result.get("can_msg_continue")
        if has_next == 1:
            next_offset = result.get("next_offset")
            time.sleep(2)
            self.crawl(next_offset)
    else:
        # 错误消息
        # {"ret":-3,"errmsg":"no session","cookie_count":1}
        logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
        exit()
示例#10
0
    def run(self):

        # 翻页地址
        page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={}&f=json&offset={}&count=10&is_ok=1&scene=&uin=777&key=777&pass_ticket={}&wxtoken=&appmsg_token=" + self.appmsg_token + "&x5=0f=json"
        # 将 cookie 字典化
        wx_dict = utils.str_to_dict(self.cookie,
                                    join_symbol='; ',
                                    split_symbol='=')
        # 请求地址
        response = requests.get(page_url.format(self.biz,
                                                self.begin_page_index * 10,
                                                wx_dict['pass_ticket']),
                                headers=self.headers,
                                verify=False)
        # 将文章列表字典化
        articles = self.article_list(response.text)
        info = Articles(self.appmsg_token, self.cookie)

        result = []
        for a in articles['list']:
            if 'app_msg_ext_info' in a.keys(
            ) and '' != a.get('app_msg_ext_info').get('content_url', ''):

                read_num, old_like_num, like_num = info.read_like_nums(
                    a.get('app_msg_ext_info').get('content_url'))
                result.append(
                    str(self.num) + '条,' +
                    a.get('app_msg_ext_info').get('title') + ',' +
                    str(read_num) + ',' + str(old_like_num) + ',' +
                    str(like_num))
                time.sleep(2)

            if 'app_msg_ext_info' in a.keys():
                for m in a.get('app_msg_ext_info').get(
                        'multi_app_msg_item_list', []):
                    read_num, old_like_num, like_num = info.read_like_nums(
                        m.get('content_url'))
                    result.append(
                        str(self.num) + '条的副条,' + m.get('title') + ',' +
                        str(read_num) + ',' + str(old_like_num) + ',' +
                        str(like_num))

                    time.sleep(3)

            self.num = self.num + 1

        self.write_file(result)

        self.is_exit_or_continue()
        # 递归调用
        self.run()
示例#11
0
    def crawl(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext"\
          "?action=getmsg" \
          "&__biz=MjM5MTY3OTYyMQ==" \
          "&f=json"\
          "&offset=10"\
          "&count=10"\
          "&is_ok=1"\
          "&scene=124"\
          "&uin=777"\
          "&key=777"\
          "&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs"\
          "&wxtoken="\
          "&appmsg_token=938_DJXrTDktnbNkCkp3oOCg-lVbWhhbemMp0EkBLA~~"\
          "&x5=0"\
          "&f=json".format(offset=offset)  # appmsg_token 是临时的

        headers = """
        Host: mp.weixin.qq.com
        Accept-Encoding: gzip, deflate
        Cookie: ts_uid=3175878595; devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=YZe3bIk+CZTU9OKVtY18FMsPmra+SBBXM1/JKTMgppKJ/0V3B99XDPcwlChD+3GL; version=16060120; wap_sid2=CIDUopEDElxKbHZCekJBVEJJT0FZb1VQSTZBZDlqNllaLXJpX0ptdWplVDVHTlF4UmlzdmxwNE85VWNJanJuYlhkOTJpZkxBU0ROSU1US0taNzVwZVNXNkpTY1dHYWtEQUFBfjDA6pTSBTgMQJRO; wxuin=841525760; wxtokenkey=0764bcd88cd2a131bbb205daa5f78bbd51a67bafef98327188df0288e2450363; ua_id=0QjBcaeG8vk11IhBAAAAAGHV4Cqd2-aR9dxpfQrd_L8=; pgv_pvid=7585005942; sd_cookie_crttime=1514388301744; sd_userid=79621514388301744
        Connection: keep-alive
        Accept: */*
        User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN
        Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&devicetype=iOS10.3.3&version=16060120&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_c744c4d09c36&fontScale=100&pass_ticket=YZe3bIk%2BCZTU9OKVtY18FMsPmra%2BSBBXM1%2FJKTMgppKJ%2F0V3B99XDPcwlChD%2B3GL&wx_header=1
        Accept-Language: zh-cn
        X-Requested-With: XMLHttpRequest
        """
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
            exit()
示例#12
0
    def __init__(self):
        self.headers = str_to_dict(config.HEADERS)
        self.url_mp3 = "https://music.163.com/weapi/song/enhance/player/url?csrf_token="
        self.url_comments = "https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?crsf_token="
        self.url_lyric = "https://music.163.com/weapi/song/lyric?csrf_token="
        self.url_detail = 'http://music.163.com/api/song/detail?ids=[{}]'

        self.params_mp3 = '{"ids":"[%s]","br":128000,"csrf_token":""}'
        self.params_comments = '{"rid":"R_SO_4_%s","offset":"%s","total":"true","limit":"%s","csrf_token":""}'
        self.params_lyric = '{"id":"%s","lv":-1,"tv":-1,"csrf_token":""}'

        self.song = "https://music.163.com/#/song?id={}"

        self.request = requests.Session()
示例#13
0
    def crawl(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=getmsg&" \
              "__biz=MjM5MjAxNDM4MA==&" \
              "f=json&" \
              "offset={offset}&" \
              "count=10&" \
              "is_ok=1&" \
              "scene=126&" \
              "uin=777&" \
              "key=777&" \
              "pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&" \
              "wxtoken=&" \
              "appmsg_token=1001_2i83IvTNI8q300WZIViVq4aBpcsB_dQcmtYBVw~~&" \
              "x5=0&" \
              "f=json".format(offset=offset)  # 请将appmsg_token和pass_ticket替换成你自己的
        headers = """
Host: mp.weixin.qq.com
Accept-Encoding: br, gzip, deflate
Cookie: devicetype=iOS12.1.4; lang=zh_CN; pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl; version=17000329; wap_sid2=CITbl7QCElxDcVVSblZ5Y3NnVkwxblE4NUZsNGNOVm9Dd05YVnprZWQtdFVSYVQxYk9Edmx0bEVBZWNuWXhyVmRXVjctczZfX1BCdVphZzNBZV9YRmJPNTYwTUF2ZWtEQUFBfjCDqNLkBTgNQJVO; wxuin=646311300; wxtokenkey=777; rewardsn=; pgv_pvid=8969838003; pgv_pvi=60331008; _scan_has_moon=1; ts_uid=3719156268; tvfe_boss_uuid=5682fb061e2059e3; sd_cookie_crttime=1545398450253; sd_userid=35071545398450253
Connection: keep-alive
Accept: */*
User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16D57 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjAxNDM4MA==&scene=126&bizpsid=1553240795&sessionid=1553240795&subscene=0&devicetype=iOS12.1.4&version=17000329&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=100&pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&wx_header=1
Accept-Language: zh-cn
X-Requested-With: XMLHttpRequest
"""
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
            exit()
示例#14
0
    def crawl(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&"\
            "__biz=MzI2OTA3MTA5Mg==&"\
            "f=json&"\
            "offset={offset}&"\
            "count=10&"\
            "is_ok=1&"\
            "scene=126&"\
            "uin=777&"\
            "key=777&"\
            "pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&"\
            "wxtoken=&"\
            "appmsg_token=1003_%252FsjVJ0I%252F2UkPyb6pioMmJ3EukhtGKS9urruwRw~~&"\
            "x5=0&"\
            "f=json".format(offset=offset)

        headers = """
                    Host:mp.weixin.qq.com
                    Accept-Encoding:br, gzip, deflate
                    Cookie:devicetype=iOS12.2; lang=zh_CN; pass_ticket=Hbqp97LrbNKf6hgWuXAc/oWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF; version=1700032a; wap_sid2=CKuk/6UHElxyVVlQaE9JZDczaExNRXhBai1CQVlrRGNjT0l2RTVtdVdEcldQaThkZmdldjhEVmVsNlFhZ0FNdXl5SWFCYnlsLW9FMl9CRDFSdENQcUZDOUlXSWp2ZXNEQUFBfjCe75HlBTgNQJVO; wxuin=1958728235; wxtokenkey=777; rewardsn=; pgv_pvid=9658881596
                    Connection:keep-alive
                    Accept:*/*
                    User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN
                    Referer:https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzI2OTA3MTA5Mg==&scene=126&bizpsid=0&subscene=0&devicetype=iOS12.2&version=1700032a&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=94&pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&wx_header=1
                    Accept-Language:zh-cn
                    X-Requested-With:XMLHttpRequest
                    """
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
            exit()
示例#15
0
    def get_hyperparam_summary(self):
        """
        Creates a dataframe of parameter settings per iteration of the hpo
        """
        # Create a new empty dataframe for storing parameters per iteration
        bayes_params_df = pd.DataFrame(
            columns=self.hpo.best_params.keys(),
            index=list(range(len(self.hpo.hyperopt_summary))))
        # Add the results with each parameter a different column
        for i, params in enumerate(self.hpo.hyperopt_summary['params']):
            bayes_params_df.loc[i, :] = list(str_to_dict(params).values())

        for colname in self.hpo.hyperopt_summary.columns.tolist():
            if colname != "params":
                bayes_params_df[colname] = self.hpo.hyperopt_summary[colname]

        return bayes_params_df
示例#16
0
    def crawl(self, offset=0):
        """
        爬取更多文章
        :return:
        """
        url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=MjM5MTA5NjAyMA==&f=json&offset={offset}&count=10&is_ok=1&scene=126&uin=777&key=777&pass_ticket=mG5PIqAHg2jAY%2B6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w&wxtoken=&appmsg_token=996_7dDcPuBAmv2HxZhYfS74socUK_MniYtv4x-VIg~~&x5=0&f=json".format(
            offset=offset)  # appmsg_token 也是临时的

        headers = """
        Host: mp.weixin.qq.com
        Connection: keep-alive
        User-Agent: Mozilla/5.0 (Linux; Android 4.4.2; OPPO R11 Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36 MMWEBID/2090 MicroMessenger/7.0.3.1400(0x27000334) Process/toolsmp NetType/WIFI Language/zh_CN
        X-Requested-With: XMLHttpRequest
        Accept: */*
        Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTA5NjAyMA==&scene=126&bizpsid=0&devicetype=android-19&version=27000334&lang=zh_CN&nettype=WIFI&a8scene=3&pass_ticket=mG5PIqAHg2jAY%2B6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w&wx_header=1
        Accept-Encoding: gzip,deflate
        Accept-Language: zh-AU,en-US;q=0.8
        Cookie: rewardsn=; wxtokenkey=777; wxuin=2711770575; devicetype=android-19; version=27000334; lang=zh_CN; pass_ticket=mG5PIqAHg2jAY+6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w; wap_sid2=CM+riY0KElxjX1BQWTRrcjdFb2E3ZlU0V0kxbUZvdDRhNWl4ZHZkX3JRdzBPb2RaNkhOX05QcF9jQUFKd003X09hMUZJZDJVX2pPbDZQMW12QVdEUmhmUksxLU1CLVFEQUFBfjDo5JrjBTgNQJVO
        """

        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            self.save(msg_list)
            # 递归调用
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            # {"ret":-3,"errmsg":"no session","cookie_count":1}
            logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头")
            exit()
示例#17
0
 def parse_detail(self, id):
     try:
         logging.info("解析歌曲id为{}的信息".format(id))
         r = requests.get(self.url_detail.format(id),
                          headers=str_to_dict(config.HEADERS))
         content = r.text
         content = json.loads(content)
         if bool(content["songs"]):
             songs = content["songs"][0]
             name = songs["name"]
             artists = songs["artists"][0]
             singer = artists['name']
             data = {
                 "name": name,
                 "singer": singer,
             }
             logging.info("歌曲名:{}-歌手名:{}".format(data["name"],
                                                 data["singer"]))
             return data
         else:
             logging.info("没有歌曲id为{}信息".format(id))
     except:
         self.parse_detail(id)
    def test_str_to_dict(self):
        assert utils.str_to_dict(None) is None

        # Basic behavior for poorly formed inputs
        self.assertEquals(None, utils.str_to_dict(None))
        self.assertEquals(None, utils.str_to_dict(""))
        self.assertEquals({}, utils.str_to_dict("no_vals"))

        # Unnested arguments lists are parsed
        expected_dict_simple = {
            "url": "http://foobar.com/justin", 
            "args": "foo", 
            "otherKey": "42"
        }
        self.assertEquals(expected_dict_simple, utils.str_to_dict(self.simple_url_input_string))

        # Nested List Behavior
        expected_dict_nested = {
            "url": "http://foobar.com/justin",
            "args": ["foo", "justin", "bar"],
            "otherKey": "AnotherKey",
            "finalKey": "42"
        }
        self.assertEquals(expected_dict_nested, utils.str_to_dict(self.nested_url_input_string))
示例#19
0
    def update_post(post):
        '''post 是从 mongodb 中提取一条数据进行更新'''
        data_url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'appmsg_type': '9',
            'mid': '2650367540',
            'sn': 'ef9c6353a9255dbc00e2beac7f449dad',
            'idx': '1',
            'scene': '27',
            'title':
            'Python%E5%A5%87%E6%8A%80%E6%B7%AB%E5%B7%A7%EF%BC%8C%E7%9C%8B%E7%9C%8B%E4%BD%A0%E7%9F%A5%E9%81%93%E5%87%A0%E4%B8%AA',
            'ct': '1511410410',
            'abtest_cookie':
            'AwABAAoADAANAAcAJIgeAGSIHgD8iB4A7IkeAAaKHgAPih4AU4oeAAAA',
            'devicetype': 'android-24',
            'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js',
            'f': 'json',
            'r': '0.04959653583814139',
            'is_need_ad': '0',
            'comment_id': '1411699821',
            'is_need_reward': '1',
            'both_ad': '0',
            'reward_uin_count': '24',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket':
            'zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0',
            'wxtoken': '1922467438',
            'clientversion': '26051732',
            'appmsg_token':
            '938_0n0in1TAhMHhtZ7zXIOyxTxYXZEFW7ez7tXTmochNzKXa19P3wxK6-C-yM1omM_h7gSMZJmyv7glw98g',
            'x5': '1'
        }  # appmsg_token 记得用最新的

        # url转义处理
        content_url = html.unescape(post.content_url)
        # 截取content_url的查询参数部分
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典类型
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        # 更新到data_url
        data_url_params.update(content_url_params)
        body = "is_only_read=1&req_id=0414NBNjylwrVHDydtl3ufse&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&is_temp_url=0"
        data = utils.str_to_dict(body, "&", "=")

        # 通过Fiddler 获取 最新的值
        headers = """
        Host: mp.weixin.qq.com
        Connection: keep-alive
        Content-Length: 137
        Origin: https://mp.weixin.qq.com
        X-Requested-With: XMLHttpRequest
        User-Agent: Mozilla/5.0 (Linux; Android 7.0; MI 5 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043804 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN
        Content-Type: application/x-www-form-urlencoded; charset=UTF-8
        Accept: */*
        Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367558&idx=1&sn=b8be74571b14f78d80c062ded89b2d4a&chksm=be9cdd1289eb5404f1b423a135ce5adf5d2cf802a09014b6f407c96b40fdc88ce6e4e0ed8665&scene=27&ascene=0&devicetype=android-24&version=26051732&nettype=WIFI&abtest_cookie=AwABAAoADAANAAcAJIgeAGSIHgD8iB4A7IkeAAaKHgAPih4AU4oeAAAA&lang=zh_CN&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&wx_header=1
        Accept-Encoding: gzip, deflate
        Accept-Language: zh-CN,en-US;q=0.8
        Cookie: pgv_pvi=1680185344; pgv_si=s6583349248; rewardsn=9a0c2a83b30e5994c162; wxtokenkey=71bdfbb7fad39d08d2eb2dece479971297781391293c3c913e74f0f1c4c16971; wxuin=528927841; devicetype=android-24; version=26051732; lang=zh_CN; pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0; wap_sid2=COGYm/wBElxvYUdHZDdpUlExT2h3MnVTMS1nendPdUlZZ1BsU2h3ZUhibGNNQTRMS0t1dXhPQS1YUHNZNGdhQXk2Z0F0WkF0U3dGVUlYNnBHdlVTVEk0aHBOMktXS29EQUFBfjCyjLfSBTgNQAE=
        Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043804&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MI5 &RL=1080*1920&OS=7.0&API=24
        Q-GUID: ed3467186e1125bb3d28234d13b788cb
        Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b
                """

        headers = utils.str_to_dict(headers)

        data_url = "https://mp.weixin.qq.com/mp/getappmsgext"

        r = requests.post(data_url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text)
            exit()
示例#20
0
    def update_post(self, post):

        post_url_params = {
            '__biz': 'MzI2OTA3MTA5Mg==',
            'mid': '2651793256',
            'idx': '1',
            'sn': '91666add0a2f4dd320fa8c6065455cc4',
            'chksm':
            'f11e60f3c669e9e5a03ee6124e103d84f2fe802d464c8ca35c9259d728fc1ab199c7713fd7e3',
            'scene': '4'
        }

        data_url_params = {
            '__biz': 'MzI2OTA3MTA5Mg==',
            'appmsg_type': '9',
            'mid': '2651793256',
            'sn': '91666add0a2f4dd320fa8c6065455cc4',
            'idx': '1',
            'scene': '21',
            'title':
            '%25E5%2588%2586%25E5%25BC%2580%25E5%25A4%259A%25E5%25B9%25B4%25E7%259A%2584%25E6%2583%2585%25E4%25BE%25A3%25E5%2586%258D%25E8%25A7%2581%25E9%259D%25A2%25EF%25BC%259A%25E8%25B6%258A%25E6%2598%25AF%25E4%25B8%258D%25E7%2594%2598%25E5%25BF%2583%25E7%259A%2584%25E7%2588%25B1%25E6%2583%2585%25EF%25BC%258C%25E8%25B6%258A%25E8%25A6%2581%25E4%25BA%25B2%25E6%2589%258B%25E4%25BA%2586%25E6%2596%25AD',
            'ct': '1553356619',
            'abtest_cookie':
            'BAABAAoACwASABMABQAjlx4AVpkeAM2ZHgDZmR4A3JkeAAAA',
            'devicetype': 'iOS12.2',
            'version': '1700032a',
            'f': 'json',
            'r': '0.6048423341041006',
            'is_need_ad': '1',
            'both_ad': '0',
            'reward_uin_count': '24',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket':
            'Hbqp97LrbNKf6hgWuXAc%25252FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF',
            'wxtoken': '204390160',
            'clientversion': '26060030',
            'appmsg_token':
            '1003_%252FsjVJ0I%252F2UkPyb6pioMmJ3EukhtGKS9urruwRw~~',
            'x5': '1'
        }

        # url转义处理
        content_url = html.unescape(post.content_url)
        # 截取content_url的查询参数部分
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典类型
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        # 更新到data_url
        data_url_params.update(content_url_params)
        body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=Hbqp97LrbNKf6hgWuXAc%25252FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&is_temp_url=0"
        data = utils.str_to_dict(body, "&", "=")

        headers = """
                    Host:mp.weixin.qq.com
                    Accept-Encoding:br, gzip, deflate
                    Cookie:devicetype=iOS12.2; lang=zh_CN; pass_ticket=Hbqp97LrbNKf6hgWuXAc/oWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF; version=1700032a; wap_sid2=CKuk/6UHElxyVVlQaE9JZDczaExNRXhBai1CQVlrRGNjT0l2RTVtdVdEcldQaThkZmdldjhEVmVsNlFhZ0FNdXl5SWFCYnlsLW9FMl9CRDFSdENQcUZDOUlXSWp2ZXNEQUFBfjCe75HlBTgNQJVO; wxuin=1958728235; wxtokenkey=777; rewardsn=; pgv_pvid=9658881596
                    Connection:keep-alive
                    Accept:*/*
                    User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN
                    Referer:https://mp.weixin.qq.com/s?__biz=MzI2OTA3MTA5Mg==&mid=2651793256&idx=1&sn=91666add0a2f4dd320fa8c6065455cc4&chksm=f11e60f3c669e9e5a03ee6124e103d84f2fe802d464c8ca35c9259d728fc1ab199c7713fd7e3&scene=4&subscene=126&ascene=0&devicetype=iOS12.2&version=1700032a&nettype=WIFI&abtest_cookie=BAABAAoACwASABMABQAjlx4AVpkeAM2ZHgDZmR4A3JkeAAAA&lang=zh_CN&fontScale=94&pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&wx_header=1
                    Accept-Language:zh-cn
                    X-Requested-With:XMLHttpRequest
        """

        headers = utils.str_to_dict(headers)

        url = "https://mp.weixin.qq.com/mp/getappmsgext"
        r = requests.post(url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
示例#21
0
    def update_post(post):
        # 文章url参数
        # title参数不用管,没有影响
        data_url_params_url = "__biz=MjM5MzgyODQxMQ==&" \
                              "appmsg_type=9&" \
                              "mid=2650367799&" \
                              "sn=afd47703d8bfd41f547c3916a25a4922&" \
                              "idx=1&" \
                              "scene=38&" \
                              "title=%E6%8E%A8%E8%8D%90%E5%87%A0%E4%B8%AA%E5%85%AC%E4%BC%97%E5%8F%B7%EF%BC%88%E6%96%87%E6%9C%AB%E5%BD%A9%E8%9B%8B%EF%BC%89&" \
                              "ct=1515730434&" \
                              "abtest_cookie=BAABAAgACgAMAA0ACgCehh4AlYoeAKCKHgCxih4AuooeAL+KHgDGih4AyooeANiKHgDdih4AAAA=&devicetype=android-24&" \
                              "version=/mmbizwap/zh_CN/htmledition/js/appmsg/index3b1748.js&" \
                              "f=json&" \
                              "r=0.32110940912764097&" \
                              "is_need_ad=1&" \
                              "comment_id=4119591256&" \
                              "is_need_reward=0&" \
                              "both_ad=0&" \
                              "reward_uin_count=0&" \
                              "msg_daily_idx=1&" \
                              "is_original=0&" \
                              "uin=777&" \
                              "key=777&" \
                              "pass_ticket=IlqxjPEmgZu1FF%25252BoLPoEacmQD%25252Bpo%25252BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&" \
                              "wxtoken=1044847233&devicetype=android-24&" \
                              "clientversion=26060135&" \
                              "appmsg_token=939_faosuyskmbgJ7K6WTQdsnO_RZ5I4iaY5qFSIpAp9fCvhtU1OKrI8rZ2vJHTu_c40JGJqPPbMvqn-UG1p&" \
                              "x5=1&f=json"

        data_url_params = utils.str_to_dict(data_url_params_url, "&", "=")
        # url转义处理,文档:https://www.cnblogs.com/xuxn/archive/2011/08/12/parse-html-escape-characters-in-python.html
        content_url = html.unescape(post.content_url)
        # 分拆url,提取query的信息,资料地址:http://blog.51cto.com/yucanghai/1695439
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        # 使用字典update方法,更新文章url的参数
        data_url_params.update(content_url_params)
        body = "is_only_read=1&" \
               "req_id=03230SZyTR8kQlPVkKwxbt1A&" \
               "pass_ticket=mXHYjLnkYux1rXx8BxNrZpgW4W%25252ByLZxcuvpDWlxbBrjvJo3ECB%25252BckDAsy%25252FTJJK6P&" \
               "is_temp_url=0"

        # 将body的值转化为字典
        data = utils.str_to_dict(body, "&", "=")

        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
Content-Length: 155
Origin: https://mp.weixin.qq.com
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 7.0; Mi Note 2 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043807 Mobile Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/WIFI Language/zh_CN
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Accept: */*
Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367799&idx=1&sn=afd47703d8bfd41f547c3916a25a4922&chksm=be9cdc6389eb5575f75e424c2351969e3d6b066cb1d60466d4865381f8de5d41867ca2a526b1&scene=38&ascene=0&devicetype=android-24&version=26060135&nettype=WIFI&abtest_cookie=BAABAAgACgAMAA0ACgCehh4AlYoeAKCKHgCxih4AuooeAL%2BKHgDGih4AyooeANiKHgDdih4AAAA%3D&lang=zh_CN&pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: sd_userid=18861489362350793; sd_cookie_crttime=1489362350793; tvfe_boss_uuid=2e9655075626eb61; RK=CaObR3LaR5; gid=82fdcd05-0c3d-415b-bcdd-87cbe92d7cc4; pgv_pvi=2601810944; pac_uid=0_932f1aa5f4e35; ua_id=kE9kVdDwMCCFio0aAAAAAOmF24i23FqG2pg7TGWEqiQ=; ptcz=20c41fc2cfc570cec0dcf5cf052e240af82cfe5b2839f4ae1280b7e3d71a81cb; pt2gguin=o0364580936; o_cookie=364580936; pgv_pvid=8564607811; rewardsn=; wxtokenkey=90c34bc971c6ce5a8df3de87ab70143e0d6711e65604c5feedb0b6b425311912; wxuin=2076741001; devicetype=android-24; version=26060135; lang=zh_CN; pass_ticket=IlqxjPEmgZu1FF+oLPoEacmQD+po+SAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy; wap_sid2=CImbot4HElw4RE5GUGcyWFFQV1drdFJ5cXpZMG5yMm9qWWtwZFRoQml0TFZUVnpXVGU1RThIS3hJejhsSzFZUWw3cTdGbXVjTTdWZkpVVGVNTmhCR3l3U2FBMURGYXNEQUFBfjCzw/HSBTgNQAE=
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.1&TBSVC=43602&CO=BK&COVC=043807&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MiNote2 &RL=1080*1920&OS=7.0&API=24
Q-GUID: 2af3a0dfa8f770bff552a88a13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b      
"""
        headers = utils.str_to_dict(headers)
        # 文章url
        data_url = "https://mp.weixin.qq.com/mp/getappmsgext"
        r = requests.post(data_url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        # 保存数据
        result = r.json()
        if result.get("appmsgstat"):
            # 阅读数
            post['read_num'] = result.get("appmsgstat").get("read_num")
            # 点赞数
            post['like_num'] = result.get("appmsgstat").get("like_num")
            # 赞赏数
            post['reward_num'] = result.get("appmsgstat").get(
                "reward_total_count")
            #
            post['u_date'] = datetime.now()
            logger.info("%s read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"出错")
            exit()
示例#22
0
    def crawl(self, offset=0):

        # 如果运行失败,请修改url某些关键值和headers,下面有对比
        url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=getmsg&" \
              "__biz=MjM5MzgyODQxMQ==&" \
              "f=json&" \
              "offset={offset}&" \
              "count=10&" \
              "is_ok=1&" \
              "scene=124&" \
              "uin=777&" \
              "key=777&" \
              "pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wxtoken=&" \
              "appmsg_token=939_hihttu0x83ppBkaiAcL5WMcghKBKviEePk53yg~~&x5=1&f=json".format(offset=offset)

        url2 = "https://mp.weixin.qq.com/mp/profile_ext?" \
               "action=getmsg&" \
               "__biz=MjM5MzgyODQxMQ==&" \
               "f=json&" \
               "offset={offset}&" \
               "count=10&" \
               "is_ok=1&" \
               "scene=124&" \
               "uin=777&key=777&" \
               "pass_ticket=IlqxjPEmgZu1FF%" \
               "2BoLPoEacmQD%" \
               "2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wxtoken=&" \
               "appmsg_token=939_oSn%" \
               "252BfxCfRbQkDYBJvf52NedMZCifsAekP2Am7g~~&" \
               "x5=1&f=json".format(offset=offset)

        url1 = "https://mp.weixin.qq.com/mp/profile_ext?" \
               "action=getmsg&" \
               "__biz=MjM5MzgyODQxMQ==&" \
               "f=json&" \
               "offset={offset}&" \
               "count=10&is_ok=1&" \
               "scene=124&" \
               "uin=777&key=777&" \
               "pass_ticket=9%" \
               "2F6LknoxSlydxsdNc5ecV9AmhvAln77E5CSdIKk%2BZoSh7xDCXPx6RXPty5pMV7Vz&wxtoken=&" \
               "appmsg_token=939_5uFg4avbgDmzVEO0qQT_gALmbvliqJeaQDCp4g~~&" \
               "x5=1&" \
               "f=json".format(offset=offset)

        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 7.0; Mi Note 2 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043807 Mobile Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/WIFI Language/zh_CN
Accept: */*
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&scene=124&devicetype=android-24&version=26060135&lang=zh_CN&nettype=WIFI&a8scene=3&pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: sd_userid=18861489362350793; sd_cookie_crttime=1489362350793; tvfe_boss_uuid=2e9655075626eb61; RK=CaObR3LaR5; gid=82fdcd05-0c3d-415b-bcdd-87cbe92d7cc4; pgv_pvi=2601810944; pac_uid=0_932f1aa5f4e35; ua_id=kE9kVdDwMCCFio0aAAAAAOmF24i23FqG2pg7TGWEqiQ=; ptcz=20c41fc2cfc570cec0dcf5cf052e240af82cfe5b2839f4ae1280b7e3d71a81cb; pt2gguin=o0364580936; o_cookie=364580936; pgv_pvid=8564607811; rewardsn=; wxtokenkey=40c4e47bfcfbdb3be81209fce95dd5ac82cb3adacf5b6b224f97bba30c96dabb; wxuin=2076741001; devicetype=android-24; version=26060135; lang=zh_CN; pass_ticket=IlqxjPEmgZu1FF+oLPoEacmQD+po+SAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy; wap_sid2=CImbot4HElxoZThTLWNjNnBYUHcxMHU1c2ROU21SalFmSHl3ajlrczNJRU5UVVJfUDNjeEtaZVVqbDBYTTlqU2xTVVpqcXB5cGdUOGlSS3c1RUpMX2lMQ21QNk1nS3NEQUFBfjCu0vHSBTgMQJRO
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.1&TBSVC=43602&CO=BK&COVC=043807&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MiNote2 &RL=1080*1920&OS=7.0&API=24
Q-GUID: 2af3a0dfa8f770bff552a88a13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b
"""
        headers = utils.str_to_dict(headers)
        response = requests.get(url, headers=headers, verify=False)
        result = response.json()
        if result.get("ret") == 0:
            msg_list = result.get("general_msg_list")
            logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list))
            # 保存文章数据
            self.save(msg_list)
            has_next = result.get("can_msg_continue")
            if has_next == 1:
                # 递归
                next_offset = result.get("next_offset")
                time.sleep(2)
                self.crawl(next_offset)
        else:
            # 错误消息
            logging.error("无法正确获得内容")
            exit()
示例#23
0
    def update_post(post):
        """
        post 参数是从mongodb读取出来的一条数据
        稍后就是对这个对象进行更新保存
        :param post:
        :return:
        """

        data_url = "https://mp.weixin.qq.com/mp/getappmsgext?" \
                   "f=json&uin=777&" \
                   "key=777&" \
                   "pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&" \
                   "wxtoken=777&" \
                   "devicetype=android-19&" \
                   "cientversion=26051732&" \
                   "appmsg_token=954_MOJLemLCd3Gz0Aal3PPp0Qmza2kN9ibwtA0IyosHLQlOgxiRlgtrWkHFwC4lgSeOA0FmtshLamC-6ysv&x5=1&" \
                   "f=json"   #后续需要加载文章的url,刷新appmsg_token和pass_ticket
        # url转义处理
        content_url = html.unescape(post.content_url)
        # 截取content_url的查询参数部分
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典类型
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        body = 'r=0.9146884263687687&__biz=MzUzNTcwNDkxNA%3D%3D&appmsg_type=9&mid=2247484203&sn=a0dbce888297a156a4e9c0542094e286&idx=1&scene=38&title=%25E5%25A4%259A%25E5%25B0%2591%25E4%25BA%25BA%25E5%259B%25A0%25E4%25B8%25BA%25E7%2594%25B5%25E5%25BD%25B1%25E5%258E%25BB%25E4%25BA%2586%25E8%25A5%25BF%25E8%2597%258F&ct=1524824855&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&devicetype=android-19&version=%2Fmmbizwap%2Fzh_CN%2Fhtmledition%2Fjs%2Fappmsg%2Findex3d6703.js&is_need_ticket=0&is_need_ad=1&comment_id=0&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0100A6NrRHtU5GqTevtLXpPn&pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&is_temp_url=0&item_show_type=undefined'
        data = utils.str_to_dict(body, "&", "=")
        data.update(content_url_params)  #将content_url中的参数更新到body参数中

        # 通过Fiddler 获取 最新的值
        headers = """
Host: mp.weixin.qq.com
Connection: keep-alive
Content-Length: 796      
Origin: https://mp.weixin.qq.com
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 4.4.2; GT-I9508 Build/KOT49H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044028 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Accept: */*
Referer: https://mp.weixin.qq.com/s?__biz=MzUzNTcwNDkxNA==&mid=2247484203&idx=1&sn=a0dbce888297a156a4e9c0542094e286&chksm=fa802716cdf7ae001b3e2b539fc790b997bee12233d1f5a6cb5add4b6a9df979fd586d6b7191&scene=38&ascene=0&devicetype=android-19&version=26051732&nettype=WIFI&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&lang=zh_CN&pass_ticket=mxvGMDk3GtQtB%2Fz7%2FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%2BxoQE91&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: rewardsn=; wxtokenkey=777; wxuin=2077110005; devicetype=android-19; version=26051732; lang=zh_CN; pass_ticket=mxvGMDk3GtQtB/z7/amxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf+xoQE91; wap_sid2=CPXduN4HElxKRHdkalRfS3hya3FLamVMa0FzMFNCeFR5eV95d0ZQZzdZQUw4TjFYMHY4RVFiQWtaRVN5TlAwTEEtTkNWd3NURnJ1V2VCUTRXemdSMjdXZzN5Z3E2Ym9EQUFBfjDhgZ3XBTgNQAE=
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=044028&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= GT-I9508 &RL=1080*1920&OS=4.4.2&API=19
Q-GUID: dfbda8a63deb21e36f9d6f1113b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b

        """

        headers = utils.headers_to_dict(headers)

        r = requests.post(data_url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get(
                "reward_total_count")  #只有文章有赞赏的时候才会有此字段
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text)
            exit()
示例#24
0
if __name__ == '__main__':
    crawler = WeiXinCrawler()
    offset = 10
    url = "https://mp.weixin.qq.com/mp/profile_ext?" \
              "action=getmsg&" \
              "__biz=MjM5MjAxNDM4MA==&" \
              "f=json&" \
              "offset={offset}&" \
              "count=10&" \
              "is_ok=1&" \
              "scene=126&" \
              "uin=777&" \
              "key=777&" \
              "pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&" \
              "wxtoken=&" \
              "appmsg_token=1001_2i83IvTNI8q300WZIViVq4aBpcsB_dQcmtYBVw~~&" \
              "x5=0&" \
              "f=json"  # 请将appmsg_token和pass_ticket替换成你自己的
    headers = """
Host: mp.weixin.qq.com
Accept-Encoding: br, gzip, deflate
Cookie: devicetype=iOS12.1.4; lang=zh_CN; pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl; version=17000329; wap_sid2=CITbl7QCElxDcVVSblZ5Y3NnVkwxblE4NUZsNGNOVm9Dd05YVnprZWQtdFVSYVQxYk9Edmx0bEVBZWNuWXhyVmRXVjctczZfX1BCdVphZzNBZV9YRmJPNTYwTUF2ZWtEQUFBfjCDqNLkBTgNQJVO; wxuin=646311300; wxtokenkey=777; rewardsn=; pgv_pvid=8969838003; pgv_pvi=60331008; _scan_has_moon=1; ts_uid=3719156268; tvfe_boss_uuid=5682fb061e2059e3; sd_cookie_crttime=1545398450253; sd_userid=35071545398450253
Connection: keep-alive
Accept: */*
User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16D57 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN
Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjAxNDM4MA==&scene=126&bizpsid=1553240795&sessionid=1553240795&subscene=0&devicetype=iOS12.1.4&version=17000329&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=100&pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&wx_header=1
Accept-Language: zh-cn
X-Requested-With: XMLHttpRequest"""
    headers = utils.str_to_dict(headers)
    #crawler.crawl(url,headers,10)
示例#25
0
    def receive_and_process_new_question(self):
        data = self.client.recv()
        self.current_question = utils.str_to_dict(data)

        self.set_question(self.current_question["question"])
        self.ui.txt_answer.insert(INSERT, "")
    def update_post(post):
        """
        post 参数是从mongodb读取出来的一条数据
        稍后就是对这个对象进行更新保存
        :param post:
        :return:
        """

        # 这个参数是我从Fiddler中拷贝出 URL,然后提取出查询参数部分再转换成字典对象
        # 稍后会作为参数传给request.post方法
        data_url_params = {
            '__biz': 'MzA3NTY3NjUzMg==',
            'appmsg_type': '9',
            'mid': '2652567742',
            'sn': 'c1e80723b9982b85e825e1070dff2cf3',
            'idx': '1',
            'scene': '38',
            'title':
            '%E9%80%89%E6%88%91%E6%B2%A1%E9%94%99%EF%BC%81%E5%B7%9D%E5%A4%A7%E9%94%A6%E6%B1%9F%E6%8B%8D%E4%BA%86%E6%8B%8D%E4%BD%A0',
            'ct': '1595660850',
            'abtest_cookie': '',
            'devicetype': 'Windows 10 x64',
            'version': '62090529',
            'f': 'json',
            'r': '0.815280901005569',
            'is_need_ad': '0',
            'comment_id': '1443991147038441474',
            'is_need_reward': '1',
            'both_ad': '0',
            'reward_uin_count': '24',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': 'MzAzMDI4NDA4OQ==',
            'key':
            '1682b2315ca9ee496ced1069afbde4e355182c3cc85404049b57d7403fc7434524ed3579f779a4426cfbcd977a1047432db2e171f317e5c2f9ddea95c1f462e8f24e5590746562ab482cd133b73a7682',
            'pass_ticket':
            'ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68+Ffs6uiXclfr',
            'wxtoken': '777',
            'clientversion': '62090529',
            'appmsg_token':
            '1072_WbRi1PaTNQtHiysKbI2hg1LUqGTnT1OboYeqO0MRHgalIVUArGz-Q8bsEnuoNL3swxRUvL-HSZUgMxFT',
            'x5': '0'
        }  # appmsg_token 记得用最新的

        # url转义处理
        content_url = html.unescape(post.content_url)
        # 截取content_url的查询参数部分
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典类型
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        # 更新到data_url
        data_url_params.update(content_url_params)
        body = "r=0.815280901005569&__biz=MzA3NTY3NjUzMg%3D%3D&appmsg_type=9&mid=2652567742&sn=c1e80723b9982b85e825e1070dff2cf3&idx=1&scene=38&title=%25E9%2580%2589%25E6%2588%2591%25E6%25B2%25A1%25E9%2594%2599%25EF%25BC%2581%25E5%25B7%259D%25E5%25A4%25A7%25E9%2594%25A6%25E6%25B1%259F%25E6%258B%258D%25E4%25BA%2586%25E6%258B%258D%25E4%25BD%25A0&ct=1595660850&abtest_cookie=&devicetype=Windows%2010%20x64&version=62090529&is_need_ticket=0&is_need_ad=0&comment_id=1443991147038441474&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0422J3pAP3U6ZfMyvrA9qY2z&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&is_temp_url=0&item_show_type=0&tmp_version=1&more_read_type=0&appmsg_like_type=2&related_video_sn=&related_video_num=4&vid=&is_pay_subscribe=0&pay_subscribe_uin_count=0&has_red_packet_cover=0&album_id=1296223588617486300&album_video_num=5"
        data = utils.str_to_dict(body, "&", "=")

        # 通过Fiddler 获取 最新的值
        headers = """
 Host: mp.weixin.qq.com
Connection: keep-alive
Content-Length: 919
Origin: https://mp.weixin.qq.com
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Accept: */*
Referer: https://mp.weixin.qq.com/s?__biz=MzA3NTY3NjUzMg==&mid=2652567742&idx=1&sn=c1e80723b9982b85e825e1070dff2cf3&chksm=8482b6a5b3f53fb3970b3b3701415037bf633b06da98d2dc4c63d476e9a5ce7bd8762d5b27a0&scene=38&key=1682b2315ca9ee496ced1069afbde4e355182c3cc85404049b57d7403fc7434524ed3579f779a4426cfbcd977a1047432db2e171f317e5c2f9ddea95c1f462e8f24e5590746562ab482cd133b73a7682&ascene=7&uin=MzAzMDI4NDA4OQ%3D%3D&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&exportkey=A%2Fuw0CgT2zB13kyRA3OogXM%3D&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&winzoom=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4
Cookie: rewardsn=; wxtokenkey=777; wxuin=3030284089; devicetype=Windows10x64; version=62090529; lang=zh_CN; pass_ticket=ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68+Ffs6uiXclfr; wap_sid2=CLnu+aQLElxUNi05blptTWUxVDN3dnI0blRoUXJVcWhOSmpIMWJWWW55SThUcGpHREtQMjJNRWtZMDRtOEhxMUJyNGZfWDZXbUd5MWtReGxXWkd0NFZDNTZMOFFEekFFQUFBfjD16aX5BTgNQAE=
 """

        headers = utils.str_to_dict(headers)

        data_url = "https://mp.weixin.qq.com/mp/getappmsgext"

        r = requests.post(data_url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            # data={"base_resp":{"ret":301,"errmsg":"default"}}这是微信的反扒机制
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text)
            exit()
示例#27
0
    def update(self, post):

        post_url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'mid': '2650367149',
            'idx': '1',
            'sn': '5b9bc4a8029e7eb9b8a4b71d06524da9',
            'chksm':
            'be9cdff989eb56ef143d5b03fab7e825f08ea6a96d041aa1da50e78e765a75e60d49b42d9bf6',
            'scene': '27'
        }

        url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'appmsg_type': '9',
            'mid': '2650367680',
            'sn': '2e8ef8bcf4dc176c46376508cb5a8fa7',
            'idx': '1',
            'scene': '21',
            'title':
            '%E5%85%B3%E4%BA%8E%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E7%9A%845%E4%B8%AA%E5%B0%8F%E8%B4%B4%E5%A3%AB',
            'ct': '1513900976',
            'abtest_cookie':
            'AwABAAoADAANAAcAJIgeALuIHgDhiB4A/IgeAPqJHgAZih4ATYoeAAAA',
            'devicetype': 'android-24',
            'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js',
            'f': 'json',
            'r': '0.7675446466698528',
            'is_need_ad': '1',
            'comment_id': '3799137919',
            'is_need_reward': '1',
            'both_ad': '0',
            'reward_uin_count': '24',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket':
            'J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO',
            'wxtoken': '204390160',
            'clientversion': '26060030',
            'appmsg_token':
            '937_D8gMA6eZWUYVZo6QUXO6keTPdtbgwSEexQWAhnI8XvC1V1BMh3m05cmSURoPtkr5ppr0iDTw7bWgBkMr',
            'x5': '1'
        }

        from urllib.parse import urlsplit
        import html
        url_params.update(
            utils.str_to_dict2(
                urlsplit(html.unescape(post.content_url)).query, "&", "="))
        body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&is_temp_url=0"
        data = utils.str_to_dict2(body, "&", "=")

        headers = """
        Host: mp.weixin.qq.com
        Accept-Encoding: br, gzip, deflate
        Cookie: devicetype=iOS11.2.1; lang=zh_CN; pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs; version=16060124; wap_sid2=CLzPvfkEEnBnRTdYc3Uxa1B4NzcxdUR3T3pyNEUwYVBRb09SQlk0MWk1bTVkdVdkUlpSdTh4QUExdWNVYkVxLTN0NnVVWVZhYk5VRXJCT1hpSmI1N0FCaDNmVURBclVSSlhOcDZXZDNpTXJPZkQ0NVd1S3FBd0FBMJ3S1tIFOAxAlE4=; wxuin=1328506812; wxtokenkey=f9b3cef513031f0f50b8c9d88862ea0a8e44ae03f2dabb3e1e7d6b6f68c2233c; bk_token=fc1a7e18-e120-4f0a-a419-e1650f99b989; is_login=wx; is_wechat=1; platform=qq; pgv_pvid=6280301858; _scan_has_moon=1; eas_sid=Q1g5I1q201w0y4d0o2W5z3N4j4; _ga=GA1.2.557769603.1510924852; pac_uid=0_64f45f141b0a7; sd_cookie_crttime=1508507376988; sd_userid=58871508507376987
        Connection: keep-alive
        Accept: */*
        User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN
        Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTY3OTYyMQ==&scene=124&devicetype=iOS11.2.1&version=16060124&lang=zh_CN&nettype=WIFI&a8scene=3&fontScale=100&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs&wx_header=1
        Accept-Language: zh-cn
        X-Requested-With: XMLHttpRequest
        """

        headers = utils.str_to_dict(headers)

        url = "https://mp.weixin.qq.com/mp/getappmsgext"
        r = requests.post(url,
                          data=data,
                          verify=False,
                          params=url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
示例#28
0
文件: crawler.py 项目: isbox/py_wx
    def update_post(post):
        """
        :param post: :type: object mongodb中读取出来的一条数据
        :return:
        """
        data_url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'appmsg_type': '9',
            'mid': '2650367961',
            'sn': 'f519e5549538ac753ff8887707421df5',
            'idx': '1',
            'scene': '38',
            'title': post.title,
            'ct': '1519553936',
            'abtest_cookie':
            'AgABAAoADAAKAJmKHgCmih4AzooeAOqKHgAoix4APoseAEmLHgCNix4AoIseAKeLHgAAAA==',
            'devicetype': 'android-22',
            'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3baf4b.js',
            'f': 'json',
            'r': '0.5196830451803642',
            'is_need_ad': '1',
            'comment_id': '1290332442',
            'is_need_reward': '0',
            'both_ad': '0',
            'reward_uin_count': '0',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket':
            'zTKHNEdnkkZnPKGrHZa9HSJG%252BFfcj38wC8ciLpAWzMsVRX2crPclnU2gSLX6h1EX',
            'wxtoken': '1574516728',
            'devicetype': 'android-22',
            'clientversion': '26060339',
            'appmsg_token': appmsg_token,
            'x5': '1',
            'f': 'json'
        }

        content_params = dict()
        # url转义
        content_url = html.unescape(post.content_url)
        # 截取content_url查询参数部分
        content_url_params = urlsplit(content_url).query.split('&')
        # 更新到content_url_params
        for cup in content_url_params:
            k, v = cup.split('=', 1)
            content_params[k] = v
        data_url_params.update(content_params)

        data_url = 'https://mp.weixin.qq.com/mp/getappmsgext'
        headers = '''
            Host: mp.weixin.qq.com
            Connection: keep-alive
            Content-Length: 143
            Origin: https://mp.weixin.qq.com
            X-Requested-With: XMLHttpRequest
            User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; SM801 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043909 Mobile Safari/537.36 MicroMessenger/6.6.3.1260(0x26060339) NetType/WIFI Language/zh_CN
            Content-Type: application/x-www-form-urlencoded; charset=UTF-8
            Accept: */*
            Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367961&idx=1&sn=f519e5549538ac753ff8887707421df5&chksm=be9cdc8d89eb559b396a5cb61a25844a1da42c8e65ef225683c91d0f94cc2de507099ca25ec4&scene=38&ascene=0&devicetype=android-22&version=26060339&nettype=WIFI&abtest_cookie=AgABAAoADAAKAJmKHgCmih4AzooeAOqKHgAoix4APoseAEmLHgCNix4AoIseAKeLHgAAAA%3D%3D&lang=zh_CN&pass_ticket=zTKHNEdnkkZnPKGrHZa9HSJG%2BFfcj38wC8ciLpAWzMsVRX2crPclnU2gSLX6h1EX&wx_header=1
            Accept-Encoding: gzip, deflate
            Accept-Language: zh-CN,en-US;q=0.8
            Cookie: {cookie}
            Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.3&TBSVC=43603&CO=BK&COVC=043909&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= SM801 &RL=1080*1920&OS=5.1.1&API=22
            Q-GUID: 1cd3b3a6ff71cc42acdc78d213b788cb
            Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b
        '''.format(cookie=cookie)
        headers = utils.str_to_dict(headers)

        # 藏在返回的text中
        body = {
            'is_only_read': '1',
            'req_id': '0414',
            'NBNjylwrVHDydtl3ufse': None,
            'pass_ticket':
            'zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0',
            'is_temp_url': 0
        }

        r = requests.post(data_url,
                          data=data_url_params,
                          params=body,
                          headers=headers,
                          verify=False)
        result = r.json()

        if result.get('appmsgstat'):
            print(result.get('appmsgstat'))
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()

            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()

        else:
            logger.error(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text)
            exit()
示例#29
0
 def __init__(self):
     self.host_info = utils.str_to_dict(
         pexpect.run("/bin/sh -c 'cat /etc/*-release'").decode(), '\r\n',
         '=')
示例#30
0
    def update(self, post):

        post_url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'mid': '2650367149',
            'idx': '1',
            'sn': '5b9bc4a8029e7eb9b8a4b71d06524da9',
            'chksm':
            'be9cdff989eb56ef143d5b03fab7e825f08ea6a96d041aa1da50e78e765a75e60d49b42d9bf6',
            'scene': '27'
        }

        url_params = {
            '__biz': 'MjM5MzgyODQxMQ==',
            'appmsg_type': '9',
            'mid': '2650367680',
            'sn': '2e8ef8bcf4dc176c46376508cb5a8fa7',
            'idx': '1',
            'scene': '21',
            'title':
            '%E5%85%B3%E4%BA%8E%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E7%9A%845%E4%B8%AA%E5%B0%8F%E8%B4%B4%E5%A3%AB',
            'ct': '1513900976',
            'abtest_cookie':
            'AwABAAoADAANAAcAJIgeALuIHgDhiB4A/IgeAPqJHgAZih4ATYoeAAAA',
            'devicetype': 'android-24',
            'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js',
            'f': 'json',
            'r': '0.7675446466698528',
            'is_need_ad': '1',
            'comment_id': '3799137919',
            'is_need_reward': '1',
            'both_ad': '0',
            'reward_uin_count': '24',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket':
            'J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO',
            'wxtoken': '204390160',
            'clientversion': '26060030',
            'appmsg_token':
            '937_D8gMA6eZWUYVZo6QUXO6keTPdtbgwSEexQWAhnI8XvC1V1BMh3m05cmSURoPtkr5ppr0iDTw7bWgBkMr',
            'x5': '1'
        }

        from urllib.parse import urlsplit
        import html
        url_params.update(
            utils.str_to_dict(
                urlsplit(html.unescape(post.content_url)).query, "&", "="))
        body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&is_temp_url=0"
        data = utils.str_to_dict(body, "&", "=")

        headers = """

Host: mp.weixin.qq.com
Connection: keep-alive
Content-Length: 137
Origin: https://mp.weixin.qq.com
X-Requested-With: XMLHttpRequest
User-Agent: Mozilla/5.0 (Linux; Android 7.0; M1 E Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.6.1200(0x26060030) NetType/WIFI Language/zh_CN
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Accept: */*
Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367680&idx=1&sn=2e8ef8bcf4dc176c46376508cb5a8fa7&chksm=be9cdd9489eb54822dc5993ff71050ca9011aff07fdf642b3eccdee7e20dc2efad9f21fb1a63&scene=21&ascene=7&devicetype=android-24&version=26060030&nettype=WIFI&abtest_cookie=AwABAAoADAANAAcAJIgeALuIHgDhiB4A%2FIgeAPqJHgAZih4ATYoeAAAA&lang=zh_CN&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&wx_header=1
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,en-US;q=0.8
Cookie: pgv_info=ssid=s2190841734; pgv_pvid=9172712625; rewardsn=a520f9c4f8c2c14d9ab0; wxtokenkey=902a8ac15e846d9a567021f9652cec8ddd60662aee0c86db3cb47e638f2a4bf5; wxuin=525477518; devicetype=android-24; version=26060030; lang=zh_CN; pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO; wap_sid2=CI7NyPoBElw4bFowVktRcDNGZFBkSzRxeDNRS1BZclFQYTZXa1hWNWg4THVFN21tVnVJQ1YtZjk5Qml2RjkxTThqcFZJZUFCenZ2cnpiUXhiN2dXcVI4X1pWYUJCS2tEQUFBfjCHspTSBTgNQAE=
Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.0&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= M1E &RL=1080*1920&OS=7.0&API=24
Q-GUID: 0fd685fa8c515a30dd9f7caf13b788cb
Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b
        """

        headers = utils.str_to_dict(headers)

        url = "https://mp.weixin.qq.com/mp/getappmsgext"
        r = requests.post(url,
                          data=data,
                          verify=False,
                          params=url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get("reward_total_count")
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
示例#31
0
    def update_post(post):
        """
        post 参数是从mongodb读取出来的一条数据
        稍后就是对这个对象进行更新保存
        :param post:
        :return:
        """

        data_url = "https://mp.weixin.qq.com/mp/getappmsgext?" \
                   "f=json&uin=777&" \
                   "key=777&" \
                   "pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&" \
                   "wxtoken=777&" \
                   "devicetype=android-19&" \
                   "cientversion=26051732&" \
                   "appmsg_token=954_MOJLemLCd3Gz0Aal3PPp0Qmza2kN9ibwtA0IyosHLQlOgxiRlgtrWkHFwC4lgSeOA0FmtshLamC-6ysv&x5=1&" \
                   "f=json"   #后续需要加载文章的url,刷新appmsg_token和pass_ticket
        # url转义处理
        content_url = html.unescape(post.content_url)
        # 截取content_url的查询参数部分
        content_url_params = urlsplit(content_url).query
        # 将参数转化为字典类型
        content_url_params = utils.str_to_dict(content_url_params, "&", "=")
        body = 'r=0.9146884263687687&__biz=MzUzNTcwNDkxNA%3D%3D&appmsg_type=9&mid=2247484203&sn=a0dbce888297a156a4e9c0542094e286&idx=1&scene=38&title=%25E5%25A4%259A%25E5%25B0%2591%25E4%25BA%25BA%25E5%259B%25A0%25E4%25B8%25BA%25E7%2594%25B5%25E5%25BD%25B1%25E5%258E%25BB%25E4%25BA%2586%25E8%25A5%25BF%25E8%2597%258F&ct=1524824855&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&devicetype=android-19&version=%2Fmmbizwap%2Fzh_CN%2Fhtmledition%2Fjs%2Fappmsg%2Findex3d6703.js&is_need_ticket=0&is_need_ad=1&comment_id=0&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0100A6NrRHtU5GqTevtLXpPn&pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&is_temp_url=0&item_show_type=undefined'
        data = utils.str_to_dict(body, "&", "=")
        data.update(content_url_params)  #将content_url中的参数更新到body参数中

        # 通过Fiddler 获取 最新的值
        headers = {
            'Host': 'mp.weixin.qq.com',
            'Accept-Encoding': 'br, gzip, deflate',
            'Cookie':
            'devicetype=iOS11.4.1; lang=en; pass_ticket=aGWzzp8+zyir2DKPLDnrceAi21LIqICuCOJi4d46Qnc3H4YWQtybMQQwha0k6Vv5; version=16070227; wap_sid2=CI/I96QLElxaSEFaaVcwTVc5N3I0d1Uwa2k1d19ibWlCX3pvV1pRWFVQa3I3WXl4SWxsbzJxeDBLTGR0VlRRS01sRkdjbklKUHh2VzRjemgwb3poallYdy1fU3pfY3dEQUFBfjDE5KjcBTgNQJVO; wxuin=3030246415; wxtokenkey=777; rewardsn=; pgv_pvid=7650693240',
            'Connection': 'keep-alive',
            'Accept': '*/*',
            'User-Agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15G77 MicroMessenger/6.7.2 NetType/WIFI Language/en',
            'Referer https':
            '//mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjA5MTQyMA==&scene=126&devicetype=iOS11.4.1&version=16070227&lang=en&nettype=WIFI&a8scene=0&fontScale=94&pass_ticket=aGWzzp8%2Bzyir2DKPLDnrceAi21LIqICuCOJi4d46Qnc3H4YWQtybMQQwha0k6Vv5&wx_header=1',
            'Accept-Language': 'en-us',
            'X-Requested-With': 'XMLHttpRequest',
        }

        #headers = utils.headers_to_dict(headers)

        r = requests.post(data_url,
                          data=data,
                          verify=False,
                          params=data_url_params,
                          headers=headers)

        result = r.json()
        if result.get("appmsgstat"):
            post['read_num'] = result.get("appmsgstat").get("read_num")
            post['like_num'] = result.get("appmsgstat").get("like_num")
            post['reward_num'] = result.get(
                "reward_total_count")  #只有文章有赞赏的时候才会有此字段
            post['u_date'] = datetime.now()
            logger.info("「%s」read_num: %s like_num: %s reward_num: %s" %
                        (post.title, post['read_num'], post['like_num'],
                         post['reward_num']))
            post.save()
        else:
            logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text)
            exit()
示例#32
0
    def detail(self, article_url):
        # 文章链接
        article_url = "http://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367644&idx=1&sn=9951edf4e9bfebcdaa7dd66a639befea&chksm=be9cddc889eb54de36a00865dcd15f9cf906d1868430dd62f1fe550b55e713b333344811e717&scene=27#wechat_redirect"

        header = """
Host: mp.weixin.qq.com
Cookie: devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=bh2aZVyo+yUXTHI+3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB; version=16060021; wap_sid2=CIDUopEDElxKMk5jQVZpM3lWd3RVQXVZYmV1QVA5Z3l3d3hWdFJGQzNhV3BuU2VxOEN2NEtyaVQ5QVlIc2FMVFZlV1VRRnJlZzZyTjlXNHU0MWFpcFpkUHI5R21GYWNEQUFBfjDYhMnRBTgMQJRO; wxuin=841525760; wxtokenkey=a401a4f12436d7404b2488792b57e05efdb4a67082b39db7405eaf1b43d8bd79; ua_id=seRYVLVNcjYoZPzpAAAAACA8ySAXhkrd89FL3uvLbt8=; _scan_has_moon=1; pt2gguin=o0253421576; ptcz=3d9558280f480d9453cc13b78b32059793c778a1e8aa723ce7b2f5e9744f606b; pgv_pvid=7330882815; pgv_pvi=8857346048; sd_cookie_crttime=1510571099034; sd_userid=34241510571099034; pvid=6161617834; RK=7JMfU7Y+Gq
X-WECHAT-KEY: 63f29c76b0873f93d1e09f3041a4fee49f792cb4ccca7588574e6f054f357a611cacf9e5787641eae77bc1fd78d80f5c713d53a79e144091a768ec05fcf75e3ba88681ccaa05fdba037fc9b60cc2f86a
X-WECHAT-UIN: ODQxNTI1NzYw
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.0 NetType/WIFI Language/zh_CN
Accept-Language: zh-cn
Accept-Encoding: gzip, deflate
Connection: keep-alive





        """

        # print(article_url)
        # response = requests.get(article_url, headers=utils.str_to_dict(header))
        # print(response.text)
        # appmsg_token, = utils.extract_text(response.text, r"appmsg_token.*?\"(\S+)\"")
        # print(appmsg_token)
        # response.text



        data_url = "https://mp.weixin.qq.com/mp/getappmsgext"
        # appmsg_token = "935_NOFCGGSMbypif53YGURwVY2zSD6xcJ6N_0kYBbA3uOc7G6f172hKMrEYEuF2aoAgCZfZqy2vfiqpaOKH"

        data_param = {
            'comment_id': '4200886237',
            'is_need_reward': '1',
            'reward_uin_count': '27',
            'msg_daily_idx': '1',
            'is_original': '0',
            'uin': '777',
            'key': '777',
            'pass_ticket': 'bh2aZVyo%25252ByUXTHI%25252B3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB',
            'wxtoken': '1082715157',
            'appmsg_token': '935_ale1QIchYa5yhL23YfFID3P9orUrZgludl8x0DM-Kzji1H3GmguOr5xpUVCpsKh1G5EmVd2msa4m2dRq',
            }

        article_param = utils.str_to_dict(urlsplit(article_url).query, "&", "=")
        data_param.update(article_param)

        body = "is_only_read=1&req_id=1412MureIWcNGlE3ILXVOGp2&" \
               "pass_ticket={pass_ticket}".format(pass_ticket=data_param.get("pass_ticket"))

        headers = """

Host: mp.weixin.qq.com
Accept: */*
X-Requested-With: XMLHttpRequest
Accept-Language: zh-cn
Accept-Encoding: gzip, deflate
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Origin: https://mp.weixin.qq.com
User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.0 NetType/WIFI Language/zh_CN
Connection: keep-alive
Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367634&idx=1&sn=ec23c954f7adad842d706c2ec687a35e&chksm=be9cddc689eb54d07cd27f260f8ce7bf48d5702dc59a7f9b32725a7fd02d8b4a34f58382e4f5&scene=27&ascene=7&devicetype=iOS10.3.3&version=16060021&nettype=WIFI&abtest_cookie=AwABAAoADAANAAoAJIgeAEyIHgBiiB4A2ogeAPyIHgAOiR4Ab4keAPCJHgD4iR4AB4oeAAAA&lang=zh_CN&fontScale=100&pass_ticket=bh2aZVyo%2ByUXTHI%2B3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB&wx_header=1
Content-Length: 149
Cookie: devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=bh2aZVyo+yUXTHI+3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB; version=16060021; wap_sid2=CIDUopEDElxKMk5jQVZpM3lWd3RVQXVZYmV1QVA3TUdNVktTLVVrQTV4Q3dRWkI4Vkh5aXJKbDBsZlM1ZEFlWS1IWnpqWl8wQzhoeUtZR0xRQXh3ZHB1d3IwRDVMS2NEQUFBfjDihMnRBTgNQAE=; wxtokenkey=a733e2774b3089c814379cad23098030c1568e435e09146d5595c8a044f1770b; wxuin=841525760; ua_id=seRYVLVNcjYoZPzpAAAAACA8ySAXhkrd89FL3uvLbt8=; _scan_has_moon=1; pt2gguin=o0253421576; ptcz=3d9558280f480d9453cc13b78b32059793c778a1e8aa723ce7b2f5e9744f606b; pgv_pvid=7330882815; pgv_pvi=8857346048; sd_cookie_crttime=1510571099034; sd_userid=34241510571099034; pvid=6161617834; RK=7JMfU7Y+Gq







        """
        headers = utils.str_to_dict(headers)
        response = requests.post(data_url, headers=headers, data=body, params=data_param, verify=False)
        result = response.json()
        print(result)