Python RequestHelper示例

编程语言: Python

命名空间/包名称: CoTec.core.request.request_go

类/类型: RequestHelper

hotexamples.com的示例: 11

Python RequestHelper - 已找到11个示例。这些是从开源项目中提取的最受好评的CoTec.core.request.request_go.RequestHelper现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

RequestHelper(5)

get(3)

post(2)

init(1)

示例#1

显示文件

def base_init():
    LogGo.init(Configs())
    RequestHelper.init(Configs())
    SMTPServer.init(Configs())
    Download(Configs())

    RequestHelperClassVer.init(Configs())
    ProxyHelper.init(Configs())
    MysqlHelper.init(Configs())
    BaseStrategy.init()

示例#2

显示文件

文件： gsdata.py 项目： Season02/Spezia2

    def scan_list(self, target, exists):
        list = []
        result_list = []

        cap = 'data'

        ruler = 'author:author;title:title;date:posttime;img:picurl;link:url;top:top;click:readnum_newest;vote_up:likenum_newest;subject:content'

        url = self.url.format(target.extra0, target.wx_hao)
        header = {'X-Requested-With': 'XMLHttpRequest'}

        raw = RequestHelper.get(url, header=header, file_cookie=Configs.gsdata_cookie_file)

        try:
            self.looper_js(list, raw, exists, ruler, cap)
        except Exception as e:
            E.out_err(e)
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list = self.sort(list)
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return(-1, (target, None, None, None))

示例#3

显示文件

文件： newrank.py 项目： Season02/Spezia2

    def scan_list(self, target, exists):
        """请求参数"""
        par = (['flag', 'true'], ['uuid', target.extra0])
        """抓取关键字"""
        keys = [
            'title', 'author', 'publicTime', 'url', 'clicksCount', 'likeCount',
            'publicTime', 'summary'
        ]

        list = []
        result_list = []

        try:
            raw = RequestHelper.post(NewrankRuler.url,
                                     par,
                                     file_cookie=Configs.newrank_cookie_file)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            # print(msg)
            LogGo.warning(msg)
            return (-1, (target, None, None, None))

        try:
            list = ExtraJSON.extra_newrank_wechat_list(raw, keys)
        except:
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return (-1, (target, None, None, None))

示例#4

显示文件

文件： uploadConsumer.py 项目： Season02/Spezia2

    def send_request(self, result_dic):
        json_dic = dict()
        json_dic['date'] = DateGo.get_current_date()
        json_dic['targetId'] = 'No Target ID!'
        json_dic['rowList'] = [result_dic]

        try:
            LogGo.info("Ready to Post!")

            raw = RequestHelper.post(Configs.fish_data_post_url, json = json_dic)

            preview_dic = result_dic.copy()
            preview_dic['text_not_format_clob'] = 'DUMMY CONTENT'
            preview_dic['text_blob'] = 'DUMMY CONTENT'
            json_dic['rowList'] = preview_dic

            json_str = json.dumps(json_dic)

            LogGo.info("POST CONTENT: " + json_str)
            LogGo.info("POST RESPONSE: " + str(raw))
        except Exception:
            E.out_err()

示例#5

显示文件

文件： plantform.py 项目： Season02/Spezia2

    def scan(self, target, order):
        result = []

        type = self.td(target)
        url = target.extra0  #'http://ent.people.com.cn/GB/81374/index1.html'

        cap = None
        ruler = None

        if self.td(target) == 'i':
            cap = ['var tvInfoJs=', '']
            url = self.iqiyi_base.format(url)
            ruler = 'keywords:contentKeyword;latestOrder:latestOrder;name:name;playCount:playCount;score:score;videoCount:videoCount'

        elif type == 'l':
            ruler = 'score:plist_score;comments:pcommon_count;bullets:pdm_count;like:up;hate:down;playCount:plist_play_count'
            url = self.letv_base.format(url, target.extra1)

        elif type == 't':
            cap = ["tlux.dispatch('$cover',", ");"]
            ruler = 'score:score->score;playCount:view_all_count;videoCount:episode_all;latestOrder:episode_updatedd'
            url = self.qq_base.format(url)

        elif type == 'm':
            url = self.mgtv_base.format(url)
            cap = ['"data":', ',"msg"']
            ruler = 'playCount:all;like:like;hate:unlike'

        elif type == 'y':
            ruler = 'playCount:li [总播放数];comments:li [评论];like:li [顶];score:span class=star-num'
            if not s.is_url(url):
                if not url.startswith('id'):
                    url = self.youku_prefix.format(url)
                url = self.youku_base.format(url)

        elif type == 's':
            url = self.sohu_base.format(url)

        elif type == 'c':
            url = self.cntv_base.format(url)
            ruler = 'playCount:^label [播放次数]'

        try:
            encode = ExtraHtml.get_page_encode(url)

            if type == 'y' or type == 'c':
                result = self.looper_html(url, ruler, encode, target)
            else:
                raw = RequestHelper.get(url, encode=encode)

                if type == 's':
                    result = self.finder_sohu(raw)
                else:
                    result = self.looper_js(raw, ruler, cap)
        except AttributeError as e:
            pass
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            print(msg)
            LogGo.warning(repr(e))

        if len(result) > 0:
            result = self.build_base_dic(target, result, order)

        return result[0]

示例#6

显示文件

文件： plantform.py 项目： Season02/Spezia2

class Plantform(BaseRuler):
    req = RequestHelper()

    mgtv_base = 'http://vc.mgtv.com/v2/dynamicinfo?cid={0}'
    sohu_base = 'http://count.vrs.sohu.com/count/query_Album.action?albumId={0}'
    qq_base = 'https://m.v.qq.com/play.html?cid={0}'
    letv_base = 'http://v.stat.letv.com/vplay/queryMmsTotalPCount?pid={0}&vid={1}'
    iqiyi_base = 'http://mixer.video.iqiyi.com/jp/albums/{0}'
    cntv_base = 'http://www.soku.com/detail/show/{0}'
    youku_base = 'http://list.youku.com/show/{0}.html'

    youku_prefix = 'id_{0}'
    """
    优酷采用 电脑版 节目简介页 地址 例如 http://list.youku.com/show/id_z0f2233c722ec11e6bdbb.html 或者 地址中的 id id_z0f2233c722ec11e6bdbb 或者精简后的 z0f2233c722ec11e6bdbb
    芒果tv 采集于 电脑版地址 封面页 id 例如 http://www.mgtv.com/h/295541.html?fpa=se 中的 295541
    腾讯采用 应该是 节目简介页 链接地址中的 字母数字混合id 于 PC版 但是抓取用的可能是移动版 例如 https://v.qq.com/x/cover/dhzimk1qzznf301/l0024si3r7q.html 中的 dhzimk1qzznf301 或者 http://v.qq.com/detail/4/45yhivg8n755kh1.html 中的 45yhivg8n755kh1
    爱奇艺使用 id 于 移动版开发模式 找一个像id 的 (另外，我发现电影话是 tvId 或者 aId 或者 referenceId 或者 albumId 电影的话有些不好找 是在一个 content_config 开头的请求里) 例如 http://m.iqiyi.com/v_19rrax9nq4.html#vfrm=13-0-0-1 中的 204446001?callback=Zepto1499852260800 中的 204446001
    搜狐采用 pid 或者 albumId(可能是电影才用) 于 移动版地址栏中的数字 如 http://m.film.sohu.com/album/9344732.html 中的 9344732 或者 PC详情页 开发模式 例如 http://tv.sohu.com/s2017/dnwshylxt/ 中的 v?id=3879082&pid=9347799&pageNum=1&pageSize=50&isgbk=true&var=video_similar_search_result 中的 pid 9347799
    乐视采用 pid 和 vid 于 移动版播放页开发模式 例如 http://m.le.com/vplay_29037420.html 中的 queryMmsTotalPCount?pid=10036184&vid=29037420&rnd=1499915428741&callback=jsonp4 中的 pid 和 vid
    cntv采用 优酷 搜酷平台获取播放量 使用地址id 于简介页 例如 http://www.soku.com/detail/show/XMTI1NDY1Ng 中的 XMTI1NDY1Ng
    """

    # @Annoations.exe_time
    def scan(self, target, order):
        result = []

        type = self.td(target)
        url = target.extra0  #'http://ent.people.com.cn/GB/81374/index1.html'

        cap = None
        ruler = None

        if self.td(target) == 'i':
            cap = ['var tvInfoJs=', '']
            url = self.iqiyi_base.format(url)
            ruler = 'keywords:contentKeyword;latestOrder:latestOrder;name:name;playCount:playCount;score:score;videoCount:videoCount'

        elif type == 'l':
            ruler = 'score:plist_score;comments:pcommon_count;bullets:pdm_count;like:up;hate:down;playCount:plist_play_count'
            url = self.letv_base.format(url, target.extra1)

        elif type == 't':
            cap = ["tlux.dispatch('$cover',", ");"]
            ruler = 'score:score->score;playCount:view_all_count;videoCount:episode_all;latestOrder:episode_updatedd'
            url = self.qq_base.format(url)

        elif type == 'm':
            url = self.mgtv_base.format(url)
            cap = ['"data":', ',"msg"']
            ruler = 'playCount:all;like:like;hate:unlike'

        elif type == 'y':
            ruler = 'playCount:li [总播放数];comments:li [评论];like:li [顶];score:span class=star-num'
            if not s.is_url(url):
                if not url.startswith('id'):
                    url = self.youku_prefix.format(url)
                url = self.youku_base.format(url)

        elif type == 's':
            url = self.sohu_base.format(url)

        elif type == 'c':
            url = self.cntv_base.format(url)
            ruler = 'playCount:^label [播放次数]'

        try:
            encode = ExtraHtml.get_page_encode(url)

            if type == 'y' or type == 'c':
                result = self.looper_html(url, ruler, encode, target)
            else:
                raw = RequestHelper.get(url, encode=encode)

                if type == 's':
                    result = self.finder_sohu(raw)
                else:
                    result = self.looper_js(raw, ruler, cap)
        except AttributeError as e:
            pass
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            print(msg)
            LogGo.warning(repr(e))

        if len(result) > 0:
            result = self.build_base_dic(target, result, order)

        return result[0]

    def union(self):
        """整合"""
        result = []
        soap = SoapDao()
        list = soap.get_new_count()

        pro_list = []

        while len(list) > 0:
            soap = list.pop(0)
            tmp_list = [soap]
            for i in range(len(list) - 1, -1, -1):
                if list[i][TBSoap.program.key] == soap[TBSoap.program.key]:
                    tmp_list.append(list.pop(i))
            pro_list.append(tmp_list)

        if len(pro_list) > 0:
            result = self.build_count_dic(pro_list)

        return result

    def looper_js(self, raw, ruler, cap):
        # iqiyi_cap = ['var tvInfoJs=', '']
        # cap = Sh.str_to_tup(extra3_tup)
        list = ExtraJSON.extra_any_json_dic(raw, ruler, cap=cap)

        return list

    def td(self, type):
        if isinstance(type, str):
            type = type
        elif isinstance(type, Target):
            type = type.soap_type

        if type == 'iqiyi' or type == 'i':
            return 'i'
        elif type == 'letv' or type == 'l':
            return 'l'
        elif type == 'qq' or type == 'q' or type == 't':
            return 't'
        elif type == 'mgtv' or type == 'm':
            return 'm'
        elif type == 'youku' or type == 'y':
            return 'y'
        elif type == 'sohu' or type == 's':
            return 's'
        elif type == 'cnty' or type == 'c':
            return 'c'
        else:
            return None

    def finder_sohu(self, raw):
        try:
            count = s.cut_tail(raw.split('=')[1], ';')

            return {'playCount': count}
        except:
            pass

    def looper_html(self, url, ruler, encode, target):
        content = ExtraHtml.web_extra_content(url, ruler, encode)

        if self.td(target) == 'y':
            try:
                content['comments'] = int(''.join(
                    content['comments'].split('：')[1].split(',')))
                content['like'] = int(''.join(
                    content['like'].split('：')[1].split(',')))
                content['playCount'] = int(''.join(
                    content['playCount'].split('：')[1].split(',')))
            except:
                pass
        elif self.td(target) == 'c':
            try:
                content['playCount'] = int(''.join(
                    content['playCount'].split(',')))
            except:
                pass

            # try:
            #     content['score'] = int(content['score'])
            # except:pass

        return content

    def build_base_dic(self, target, result, order):
        soap = TBSoap()
        program_dao = ProgramDao()
        soap_result = []

        try:

            name = ''
            if Configs.show_utf:
                try:
                    name = target.data_key
                except:
                    name = '<<error>>'

            LogGo.info(">>> name: " + str(name) + "(" +
                       str(result['playCount']) + ")")
            """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
            dic = dict()

            try:
                dic[soap.play_count.key] = result['playCount']  #瞬时播放量
            except KeyError as e:
                raise BaseDateLackException(str(e))

            try:
                dic[soap.keywords.key] = result['keywords']  # 关键字
            except:
                pass

            try:
                dic[soap.bullet_count.key] = result['bullets']  # 弹幕量
            except:
                pass

            try:
                dic[soap.hate_count.key] = result['hate']  # 怒踩量
            except:
                pass

            try:
                dic[soap.like_count.key] = result['like']  # 点赞量
            except:
                pass

            try:
                dic[soap.latest_order.key] = result['latestOrder']  # 最新剧集
            except:
                pass

            try:
                dic[soap.name.key] = result['name']  # 剧名
            except:
                pass

            try:
                dic[soap.name.key] = program_dao.get_title_by_id(
                    target.program_id)
            except:
                pass

            try:
                dic[soap.score.key] = result['score']  # 分数
            except:
                pass

            try:
                dic[soap.video_count.key] = result['videoCount']  # 视频数量
            except:
                pass

            try:
                # pass
                dic[soap.program.key] = target.program_id  # program
                dic[soap.target.key] = target.id  # program
            except:
                pass

            dic[soap.plantform.key] = target.soap_type

            order += 1
            dic[soap.order_code.key] = order  # """排序代码"""
            dic[soap.create_date.key] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')  # """此条记录创建时间"""
            dic[soap.valid.key] = 1

            soap_result.append(dic)
        except BaseDateLackException as e:
            msg = "Lake improtant data(" + str(e) + ')'
            LogGo.warning(msg)
        except DataFormatException as e:
            pass
            # msg = "Date format error: " + i['link'] + '\r\n' + str(e)
            # LogGo.warning(msg)
        except KeyError as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)

        return soap_result

    def build_count_dic(self, pro_list):
        # pc = TBProgramPlayCount
        result = []

        try:

            LogGo.info(">>> count: " + str(len(pro_list)))

            for programs in pro_list:
                """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
                dic = dict()

                total = 0

                for program in programs:
                    try:
                        dic[TBProgramPlayCount.program.key] = program[
                            TBSoap.program.key]
                        plantform = program[TBSoap.plantform.key]
                        count = program[TBSoap.play_count.key]

                        total += count

                        if self.td(plantform) == 'i':
                            dic[TBProgramPlayCount.count1.key] = count
                        elif self.td(plantform) == 'l':
                            dic[TBProgramPlayCount.count2.key] = count
                        elif self.td(plantform) == 't':
                            dic[TBProgramPlayCount.count3.key] = count
                        elif self.td(plantform) == 'm':
                            dic[TBProgramPlayCount.count4.key] = count
                        elif self.td(plantform) == 'y':
                            dic[TBProgramPlayCount.count5.key] = count
                        elif self.td(plantform) == 's':
                            dic[TBProgramPlayCount.count6.key] = count
                    except Exception as e:
                        import traceback
                        msg = traceback.format_exc()
                        LogGo.info(msg)

                dic[TBProgramPlayCount.total_count.key] = total
                dic[TBProgramPlayCount.
                    create_time.key] = datetime.datetime.now().strftime(
                        '%Y-%m-%d')  # """此条记录创建时间"""

                result.append(dic)
        except BaseDateLackException as e:
            msg = "Lake improtant data(" + str(e) + ')'
            LogGo.warning(msg)
        except DataFormatException as e:
            pass
            # msg = "Date format error: " + i['link'] + '\r\n' + str(e)
            # LogGo.warning(msg)
        except KeyError as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)

        return result

示例#7

显示文件

文件： weibo.py 项目： Season02/Spezia2

class WeiboRuler(BaseRuler):
    req = RequestHelper()

    request_login = '******'
    url_login = "******"
    # request_getindex = 'http://m.weibo.cn/container/getIndex'
    request_getindex = 'https://m.weibo.cn/api/container/getIndex'
    url_status = 'http://m.weibo.cn/status/'

    limited_attitude_count = 0
    limited_forward_count = 0

    exist_program = []

    """请求参数"""
    par = (['username', Configs.weibo_username], ['password', Configs.weibo_password],
           ['savestate', 1], ['ec', 0], ['entry', 'mweibo'])
    """抓取关键字"""
    keys = ['title', 'author', 'publicTime', 'url', 'clicksCount', 'likeCount', 'publicTime']
    """请求"""
    header = \
        {
            "User-Agent":
                "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Mobile Safari/537.36",
            "Referer":
                "https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F%3Fjumpfrom%3Dwapv4%26tip%3D1",
            "Origin":
                "https://passport.weibo.cn",
            "Host":
                "passport.weibo.cn",
            "DNT":
                "1",
            "Content-Type":
                "application/x-www-form-urlencoded",
            "Connection":
                "keep-alive",
            "Accept-Language":
                "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4",
            "Accept-Encoding":
                "gzip, deflate, br",
        }

    def scan_list(self, target, exists):
        self.limited_forward_count = target.limited_forward_count
        self.limited_attitude_count = target.limited_attitude_count

        list = []
        result_list = []

        """模拟登陆"""
        status = 'you got it'

        """如果登陆成功"""
        if status != '':
            self.loops(target,exists,list)
            if len(list) < 1:
                return (0, (target, None, None, None))
        else:
            LogGo.warning("Weibo: Loop scan faild!")
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list = self.purify(list)
            list.reverse()

            for item in list:
                if exists.count(item['id']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return(-1, (target, None, None, None))

    def scan_detail(self, target, detail_page_bundle, order, content_ruler, encode):
        self.limited_forward_count = target.limited_forward_count
        self.limited_attitude_count = target.limited_attitude_count

        if detail_page_bundle is not None:
            return self.build_single_page_dic(target, detail_page_bundle, order, content_ruler, encode)
        else:
            return None

    """获取最新的微博"""
    # def scan_latest(self, target, exists, order):
    #     # order = ScrappdeDataDao.get_max_order('weibo')  # 数据库中排序代码
    #
    #     result = []
    #
    #     """模拟登陆"""
    #     status = WeiboRuler.req._get(WeiboRuler.url_login)
    #     raw = WeiboRuler.req.post_ex(WeiboRuler.request_login, WeiboRuler.par, header=WeiboRuler.header)
    #
    #     """如果登陆成功"""
    #     if status != '':
    #         base_url = target.extra0
    #         for i in range(1, Configs().length_weibo):
    #             print("page: " + str(i))
    #
    #             list = self.build_and_request(WeiboRuler.keys, base_url, WeiboRuler.request_getindex, i)
    #
    #             if len(list) == 0:
    #                 break
    #             for item in list:
    #                 """日常抓取时的重复验证"""
    #                 if exists.count(item['id']) < 1:
    #                     result.append(item)
    #                 else:
    #                     break
    #     else:
    #         LogGo.warning("Weibo: Simulation Weibo Login Failed!")
    #         raise Exception("Invalid Username or Password")
    #
    #     list = self.build_base_dic(result, exists, order, target)
    #     return list

    # def scan(self,target,exists, order, exists_program:list):
    #     # order = ScrappdeDataDao.get_max_order('weibo')  # 数据库中排序代码
    #
    #     self.limited_forward_count = target.limited_forward_count
    #     self.limited_attitude_count = target.limited_attitude_count
    #
    #     self.exist_program = exists_program
    #
    #     result = []
    #
    #     """模拟登陆"""
    #     status = 'you got it'#WeiboRuler.req._get(WeiboRuler.url_login)
    #     # raw = WeiboRuler.req.post_ex(WeiboRuler.request_login, WeiboRuler.par,header=WeiboRuler.header)
    #
    #     """如果登陆成功"""
    #     if status != '':
    #         self.loops(target,exists,result)
    #         if len(result) < 1:
    #             return None
    #     else:
    #         LogGo.warning("Weibo: Simulation Weibo Login Failed!")
    #         raise Exception("Invalid Username or Password")
    #
    #     result = self.purify(result)
    #
    #     result.reverse()
    #
    #     return self.build_base_dic(result,exists,order, target)

    # @Annoations.exe_time
    def loops(self,target,exists,result):
        try:
            base_url = target.extra0
            for i in range(0, Configs().length_weibo):  # [::-1]:
                print("page: " + str(i))

                list = self.build_and_request(WeiboRuler.keys, base_url, WeiboRuler.request_getindex, i)

                if len(list) == 0:
                    break
                for item in list:
                    """日常抓取时的重复验证"""
                    if exists.count(item['id']) < 1:
                        result.append(item)
                    else:
                        return
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            print(msg)
            LogGo.warning(repr(e))
            LogGo.warning("Scan Failed!")
            return

    """
    根据 id 去除重复微博(抓取重复验证，因为微博的更新很快，有可能在抓取途中就发生了位移)
    从 index = 0 开始遍历,若不重复则 放入 result ，result 的重复检测设置个限制，就 100吧
    倒序查找我认为更有效率

    20180306:
    增加过滤条件：转发量（大于），点赞量（大于），节目数量检查（3个以下）
    """
    # @Annoations.exe_time
    def purify(self,list):
        if len(list) < 1:
            return []

        result = []

        for i in list:
            try:
                flag = True

                id = i['id']
                text = i['text']

                limited_attitude_count = i['attitudes_count']
                limited_forward_count = i['reposts_count']

                program_count = 0

                # 基础过滤重复id
                for seq in result[::-1]:
                    sid = seq['id']
                    if id == sid:
                        flag = False
                        break

                # 第一次节目名过滤（有可能会包含到非节目）
                if flag and text.count('《') < 1:
                    flag = False

                # 条二次参数过滤
                if flag and self.limited_attitude_count is not None and limited_attitude_count is not None:
                    if limited_attitude_count < self.limited_attitude_count:
                        flag = False

                if flag and self.limited_forward_count is not None and limited_forward_count is not None:
                    if limited_forward_count < self.limited_forward_count:
                        flag = False

                # 第三次依据节目名过滤
                if flag:
                    for program in self.exist_program:
                        if text.count(program) >= 1:
                            program_count = program_count + 1

                        if program_count > 3:
                            flag = False
                            break

                if flag:
                    result.append(i)
            except Exception as e:
                import traceback
                msg = traceback.format_exc()
                LogGo.warning(msg)

        return result

    def build_single_page_dic(self, target, detail_page_bundle, order, content_ruler, encode):
        news = TBNews()
        article = TBArticle()

        result_dic = dict()

        try:
            LogGo.info(WeiboRuler.url_status + detail_page_bundle['id'])

            # blob = i['text'].encode("UTF-8")

            """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
            news_dic = dict()
            article_dic = dict()

            """排序代码"""
            order += 2
            news_dic[news.order_code.key] = order
            # dic[news.text_not_format.key] = i['text']#"""去除标签的正文内容"""
            # dic[news.text_blob.key] = blob #"""原始带标签字段"""

            sub_tim = detail_page_bundle['created_at']
            if sub_tim is not None:
                news_dic[news.subscribe_time.key] = sub_tim  # getattr(i, 'publicTime') """文章发表日期"""
            else:
                LogGo.warning("no subscribe time!")

            news_dic[news.create_date.key] = DateGo.get_current_date()  # """此条记录创建时间"""
            news_dic[news.status.key] = 1  # """状态"""
            news_dic[news.valid.key] = 1

            news_dic[news.title.key] = detail_page_bundle['text']
            news_dic[news.text_not_format.key] = detail_page_bundle['text']
            news_dic[news.text_blob.key] = detail_page_bundle['text']

            # title = None
            # try:
            #     title = i['page_info']
            #     title = title['content1']
            # except Exception as e:
            #     pass

            # if title is None:
            #     dic[news.title.key] = i['text']  # getattr(i, 'title') """文章标题"""
            # else:
            #     dic[news.title.key] = title # """文章标题"""

            """文章所属机构"""
            try:
                user = detail_page_bundle['user']
                screen_name = user['screen_name']
                article_dic[article.company.key] = screen_name  # getattr
            except Exception as e:
                pass

            article_dic[article.vote_up_count.key] = detail_page_bundle['attitudes_count']  # getattr(i, 'likeCount') """点赞数"""
            article_dic[article.scrabble_type.key] = 'weibo' #"""文章类型"""
            article_dic[article.is_scrabbled.key] = 1 #"""在数据库中作为 这是一条抓取到的数据 的标记"""
            article_dic[article.identifier.key] = detail_page_bundle['id'] #"""数据在母体中的 id"""
            article_dic[article.target_id.key] = target.id
            article_dic[article.content_url.key] = WeiboRuler.url_status + detail_page_bundle['id']  # getattr(i, 'url') """正文链接"""

            article_dic[article.publishStatus.key] = 1
            # article_dic[article.messageType.key] = random.randint(0, 1)

            """如果是回复 或者 引用 会有被引用的微博，记录那个微博的 id"""
            try:
                retweeted_status = detail_page_bundle['retweeted_status']
                ret_id = retweeted_status['id']

                article_dic[article.identifier_re.key] = ret_id
            except Exception as e:
                pass

            """阅读量"""
            # dic['click_count'] = i['clicksCount'] #getattr(i, 'clicksCount')
            """转发数"""
            """评论量"""

            # """图片组"""
            # try:
            #     pics = i['pics']
            #     if len(pics) > 0:
            #         group_id = PictureDao.save_group_data(pics)
            #         if group_id is not None:
            #             dic['group_picture_id'] = group_id
            # except Exception as e:
            #     print(e)
            #     LogGo.warning(dic['content_url'])
            #     LogGo.warning(e)

            result_dic.update(article_dic)
            result_dic.update(news_dic)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)

            return None

        return result_dic

    # def build_base_dic(self,result,exists,order, target):
    #     list = []
    #     a_list = []
    #
    #     news = TBNews()
    #     article = TBArticle()
    #
    #     for i in result:
    #         try:
    #             if exists.count(i['id']) < 1:  # getattr(i, 'url')
    #
    #                 LogGo.info(WeiboRuler.url_status + i['id'])
    #
    #                 # blob = i['text'].encode("UTF-8")
    #
    #                 """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
    #                 dic = dict()
    #                 article_dic = dict()
    #
    #                 """排序代码"""
    #                 order += 2
    #                 dic[news.order_code.key] = order
    #                 # dic[news.text_not_format.key] = i['text']#"""去除标签的正文内容"""
    #                 # dic[news.text_blob.key] = blob #"""原始带标签字段"""
    #
    #                 sub_tim = i['created_at']
    #                 if sub_tim is not None:
    #                     dic[news.subscribe_time.key] = sub_tim  # getattr(i, 'publicTime') """文章发表日期"""
    #                 else:
    #                     LogGo.warning("no subscribe time!")
    #
    #                 dic[news.create_date.key] = DateGo.get_current_date()  # """此条记录创建时间"""
    #                 dic[news.status.key] = 1  # """状态"""
    #                 dic[news.valid.key] = 1
    #
    #                 dic[news.title.key] = i['text']
    #
    #                 # title = None
    #                 # try:
    #                 #     title = i['page_info']
    #                 #     title = title['content1']
    #                 # except Exception as e:
    #                 #     pass
    #
    #                 # if title is None:
    #                 #     dic[news.title.key] = i['text']  # getattr(i, 'title') """文章标题"""
    #                 # else:
    #                 #     dic[news.title.key] = title # """文章标题"""
    #
    #                 """文章所属机构"""
    #                 try:
    #                     user = i['user']
    #                     screen_name = user['screen_name']
    #                     article_dic[article.company.key] = screen_name  # getattr
    #                 except Exception as e:
    #                     pass
    #
    #                 article_dic[article.vote_up_count.key] = i['attitudes_count']  # getattr(i, 'likeCount') """点赞数"""
    #                 article_dic[article.scrabble_type.key] = 'weibo' #"""文章类型"""
    #                 article_dic[article.is_scrabbled.key] = 1 #"""在数据库中作为 这是一条抓取到的数据 的标记"""
    #                 article_dic[article.identifier.key] = i['id'] #"""数据在母体中的 id"""
    #                 article_dic[article.target_id.key] = target.id
    #                 article_dic[article.content_url.key] = WeiboRuler.url_status + i['id']  # getattr(i, 'url') """正文链接"""
    #
    #                 """如果是回复 或者 引用 会有被引用的微博，记录那个微博的 id"""
    #                 try:
    #                     retweeted_status = i['retweeted_status']
    #                     ret_id = retweeted_status['id']
    #
    #                     article_dic[article.identifier_re.key] = ret_id
    #                 except Exception as e:
    #                     pass
    #
    #                 list.append(dic)
    #                 a_list.append(article_dic)
    #
    #                 """阅读量"""
    #                 # dic['click_count'] = i['clicksCount'] #getattr(i, 'clicksCount')
    #                 """转发数"""
    #                 """评论量"""
    #
    #                 # """图片组"""
    #                 # try:
    #                 #     pics = i['pics']
    #                 #     if len(pics) > 0:
    #                 #         group_id = PictureDao.save_group_data(pics)
    #                 #         if group_id is not None:
    #                 #             dic['group_picture_id'] = group_id
    #                 # except Exception as e:
    #                 #     print(e)
    #                 #     LogGo.warning(dic['content_url'])
    #                 #     LogGo.warning(e)
    #
    #         except Exception as e:
    #             import traceback
    #             msg = traceback.format_exc()
    #             LogGo.warning(msg)
    #             # LogGo.warning(WeiboRuler.url_status + i['id'])
    #
    #     return list, a_list

    """
    通过给定的 首页 url 拼接出分页请求用地址，并且发出请求获得回应数据
    """
    def build_and_request(self, keys, base_url, url, page):

        we_par_header = base_url.split('?')[0]
        we_par = base_url.split('?')[1]
        we_pars = we_par.split('&')

        _we_pars = dict()

        for par in we_pars:
            tmp = par.split('=',1)
            _we_pars[tmp[0]] = tmp[1]

        we_pars_dic = dict()

        we_pars_dic['uid'] = _we_pars['uid']
        we_pars_dic['luicode'] = _we_pars['luicode']
        we_pars_dic['type'] = 'uid'
        we_pars_dic['value'] = _we_pars['uid']
        we_pars_dic['lfid'] = _we_pars['lfid']
        we_pars_dic['containerid'] = '107603' + _we_pars['uid']

        # we_pars_dic['featurecode'] = '0'
        # we_pars_dic['retcode'] = '0'

        request_url = ""
        request_url += url
        request_url += "?"

        """dest 标注参数拼接的顺序"""
        dest = ['uid', 'luicode', 'lfid', 'type', 'value', 'containerid']
        # dest = ['uid', 'luicode', 'lfid', 'featurecode', 'retcode', 'type', 'value', 'containerid']

        for key in dest:
            request_url += key
            request_url += "="
            request_url += str(we_pars_dic[key])
            request_url += "&"

        if int(page) > 1:
            request_url += 'page=' + str(page)

        """抓取地址"""
        # raw = RequestHelper.get(request_url)
        raw = WeiboRuler.req._get(request_url)
        # print(raw)

        """ 采集数据 开始字符 采集关键字 """
        tup = ExtraJSON.extra_getindex_list(raw, keys)

        return tup




# weibo = WeiboRuler()
#
# list = [{'id': '1',},{'id': '1',},{'id': '3',},{'id': '3',},{'id': '3',},{'id': '6',},{'id': '8',},{'id': '8',},]
# result = weibo.purify(list)
#
# print(result)

示例#8

显示文件

文件： somebody_help.py 项目： Season02/Spezia2

 def get_request():
     url = 'http://app.media-plus.cn/portal/search/updateIndex'
     RequestHelper.get(url)

示例#9

显示文件

文件： newrank.py 项目： Season02/Spezia2

class NewrankRuler(BaseRuler):
    jsons = ExtraJSON()

    req = RequestHelper()
    url = 'https://www.newrank.cn/xdnphb/detail/getAccountArticle'
    """从 target 中获取 uuid 公众号 然后提取"""
    """返回结果为 dic 的 list ，每个 list 元素为一条微信"""
    def scan_list(self, target, exists):
        """请求参数"""
        par = (['flag', 'true'], ['uuid', target.extra0])
        """抓取关键字"""
        keys = [
            'title', 'author', 'publicTime', 'url', 'clicksCount', 'likeCount',
            'publicTime', 'summary'
        ]

        list = []
        result_list = []

        try:
            raw = RequestHelper.post(NewrankRuler.url,
                                     par,
                                     file_cookie=Configs.newrank_cookie_file)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            # print(msg)
            LogGo.warning(msg)
            return (-1, (target, None, None, None))

        try:
            list = ExtraJSON.extra_newrank_wechat_list(raw, keys)
        except:
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return (-1, (target, None, None, None))

    # def test_loop(self, ):

    def scan_detail(self, target, detail_page_bundle, order, content_ruler,
                    encode):
        news = TBNews()
        article = TBArticle()

        # picture_dao = PictureDao()

        result_dic = dict()

        try:
            info = self.ready_info(detail_page_bundle['title'],
                                   detail_page_bundle['url'])
            LogGo.info(info)

            try:
                # tup = ExtraJSON.wechat_extra_content(detail_page_bundle['url'])
                tup = self.jsons.wechat_extra_content(
                    detail_page_bundle['url'])
            except HttpConnectionFailedException as e:
                LogGo.warning(repr(e))
                return (-3, None)
            except AttributeError:
                LogGo.warning(
                    "Maybe a deleted msg, complete the code to detect this error"
                )
                return (-2, None)
            except Exception:
                LogGo.warning("Error when get detail message!")
                return (-2, None)

            raw_content = tup[1]
            content = tup[2]
            picture = tup[3]
            """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
            news_dic = dict()
            article_dic = dict()

            ############################## NEWS ###############################
            """列表图片 id"""
            # if picture is not None:
            #     picture_id = picture_dao.save_data(picture)
            #     news_dic[news.main_pic_id.key] = picture_id

            news_dic[news.text_not_format.key] = content  # """去除标签的正文内容"""
            # dic[news.text_blob.key] = raw_content#"""原始带标签字段"""
            news_dic[news.subscribe_time.key] = detail_page_bundle[
                'publicTime']  # """文章发表日期"""
            news_dic[news.create_date.key] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')  # """此条记录创建时间"""
            news_dic[news.subject.key] = detail_page_bundle[
                'summary']  # """摘要"""
            news_dic[news.valid.key] = 1
            news_dic[news.author.key] = detail_page_bundle['author']
            news_dic[news.title.key] = detail_page_bundle[
                'title']  # """文章标题"""
            news_dic[news.status.key] = 2
            order += 5
            news_dic[news.order_code.key] = order  # """排序代码"""

            ############################## ARTICLE ###############################

            article_dic[article.content_url.key] = detail_page_bundle[
                'url']  # getattr(i, 'url')"""正文链接"""
            article_dic[article.fingerprint.key] = md5(
                detail_page_bundle['url'])  # """由地址生成的指纹"""
            article_dic[article.company.key] = target.data_key  # """文章所属机构"""
            article_dic[article.target_id.key] = target.id
            article_dic[article.raw_click_count.key] = detail_page_bundle[
                'clicksCount']  # getattr(i, 'clicksCount')#"""阅读量"""
            article_dic[article.vote_up_count.key] = detail_page_bundle[
                'likeCount']  # getattr(i, 'likeCount')"""点赞数"""
            article_dic[article.scrabble_type.
                        key] = 'wechat'  # """文章类型 微信固定值为  wechat  """
            article_dic[
                article.is_scrabbled.key] = 1  # """在数据库中作为 这是一条抓取到的数据 的标记"""

            article_dic[article.publishStatus.key] = 1
            # article_dic[article.messageType.key] = random.randint(0, 1)

            ############################## DIC ###############################

            result_dic.update(news_dic)
            result_dic.update(article_dic)
        except Exception:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)

            return (-1, None)

        return (1, result_dic)

示例#10

显示文件

文件： gsdata.py 项目： Season02/Spezia2

class GsdataRuler(BaseRuler):

    jsons = ExtraJSON()

    req = RequestHelper()
    url = 'http://www.gsdata.cn/rank/toparc?wxname={0}&wx={1}&sort=-1'

    def __init__(self):
        self.news = NewsDao()

    """从 target 中获取 uuid 公众号 然后提取"""
    """返回结果为 dic 的 list ，每个 lfist 元素为一条微信"""
    def looper_js(self, result, raw, exists, ruler, captup=None):
        cap = captup
        if captup != None:
            if captup.count(' ') == 2:
                cap = Sh.str_to_tup(captup)

        list = ExtraJSON.extra_any_json(raw, ruler, cap=cap)

        if len(list) > 0:
            for item in list:
                """日常抓取时的重复验证"""
                if 1>0:#if exists.count(item['link']) < 1:
                    result.append(item)
                else:
                    break

    def sort(self, list):
        for i in range(0,len(list)):
            for j in range(i + 1, len(list)):
               if list[i]['date'] < list[j]['date'] or list[i]['date'] == list[j]['date'] and list[i]['top'] > list[j]['top'] :
                   list[i], list[j] = list[j], list[i]

        return list

    def scan_list(self, target, exists):
        list = []
        result_list = []

        cap = 'data'

        ruler = 'author:author;title:title;date:posttime;img:picurl;link:url;top:top;click:readnum_newest;vote_up:likenum_newest;subject:content'

        url = self.url.format(target.extra0, target.wx_hao)
        header = {'X-Requested-With': 'XMLHttpRequest'}

        raw = RequestHelper.get(url, header=header, file_cookie=Configs.gsdata_cookie_file)

        try:
            self.looper_js(list, raw, exists, ruler, cap)
        except Exception as e:
            E.out_err(e)
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list = self.sort(list)
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return(-1, (target, None, None, None))

    def scan_detail(self, target, detail_page_bundle, order, content_ruler, encode):
        news = TBNews()
        article = TBArticle()

        # picture_dao = PictureDao()

        result_dic = dict()

        try:
            """由地址生成的指纹"""
            signature = md5(detail_page_bundle['link'])

            info = self.ready_info(detail_page_bundle['title'], detail_page_bundle['link'])
            LogGo.info(info)

            try:
                tup = self.jsons.wechat_extra_content(detail_page_bundle['link'])  # getattr(i, 'url')
            except HttpConnectionFailedException as e:
                LogGo.warning(repr(e))
                return (-3, None)
            except AttributeError as ae:
                LogGo.warning("Maybe a deleted msg, complete the code to detect this error")
                return (-2, None)

            raw_content = tup[1]
            content = tup[2]

            """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
            news_dic = dict()
            article_dic = dict()

            ############################## NEWS ###############################

            """列表图片"""
            picture = detail_page_bundle['img']
            """列表图片 id"""
            # if picture is not None:
            #     picture_id = picture_dao.save_data(picture)
            #     news_dic[news.main_pic_id.key] = picture_id

            order = order + 2
            news_dic[news.order_code.key] = order  # """排序代码"""
            news_dic[news.subject.key] = detail_page_bundle['subject']  # """摘要"""
            news_dic[news.valid.key] = 1
            news_dic[news.create_date.key] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')  # """此条记录创建时间"""
            news_dic[news.text_not_format.key] = content  # """去除标签的正文内容"""
            # news_dic[news.text_blob.key] = raw_content #"""原始带标签字段"""
            news_dic[news.title.key] = detail_page_bundle['title']  # getattr(i, 'title') """文章标题"""
            news_dic[news.subscribe_time.key] = detail_page_bundle['date']  # getattr(i, 'publicTime') """文章发表日期"""
            news_dic[news.status.key] = 2

            try:
                news_dic[news.author.key] = detail_page_bundle['author']  # getattr(i, 'clicksCount') """阅读量"""
            except:
                pass

            ############################## ARTICLE ###############################

            try:
                article_dic[article.raw_click_count.key] = int(
                    detail_page_bundle['click'])  # getattr(i, 'clicksCount') """阅读量"""
            except:
                pass

            try:
                article_dic[article.vote_up_count.key] = int(
                    detail_page_bundle['vote_up'])  # getattr(i, 'likeCount') """点赞数"""
            except:
                pass

            article_dic[article.scrabble_type.key] = 'wechat'  # """文章类型 微信固定值为  wechat  """
            article_dic[article.is_scrabbled.key] = 1  # """在数据库中作为 这是一条抓取到的数据 的标记"""
            article_dic[article.fingerprint.key] = signature  # """由地址生成的指纹"""
            article_dic[article.target_id.key] = target.id
            article_dic[article.company.key] = target.data_key  # getattr(i, 'author') """文章所属机构"""
            article_dic[article.content_url.key] = detail_page_bundle['link']  # getattr(i, 'url') """正文链接"""

            article_dic[article.publishStatus.key] = 1
            # article_dic[article.messageType.key] = random.randint(0, 1)

            ############################## DIC ###############################

            result_dic.update(article_dic)
            result_dic.update(news_dic)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            LogGo.warning(msg)

            return (-1, None)

        return (1, result_dic)

示例#11

显示文件

class WechatRuler:

    req = RequestHelper()
    """抓取关键字"""
    keys = ['author', 'content_url', 'cover', 'digest', 'title', 'datetime', 'fileid']

    """从 target 中获取 uuid 公众号 然后提取"""
    """返回结果为 dic 的 list ，每个 list 元素为一条微信"""

    @Annoations.exe_time
    def ExtraList(self, target, existsUrls, order):

        # order = ScrappdeDataDao.get_max_order_code()  # 数据库中排序代码
        result = []

        url = str(target.extra0)
        next_index = ""

        """抓取地址"""
        raw = WechatRuler.req._get(url)

        try:
            trup = ExtraJSON.extraWechatList(raw, 'msgList', WechatRuler.keys)
            list = trup[0]
            next_index = str(trup[1])
        except Exception as e:
            print(e)
            print("ERROR")
            return result

        while True:
            try:
                print('>>> scaning id: ' + next_index)
                LogGo.info('>>> scaning id: ' + next_index)
                tup = self.loopToFail(url, next_index)

                re_list = tup[0]
                next_index = str(tup[1])
                is_continue = tup[2]

                if len(re_list) > 0:
                    for item in re_list:
                        list.append(item)
                    # break
                else:
                    break

                if is_continue != 1:
                    break

            except Exception as e:
                print(e)
                break

        print('>>> list scaning completed')
        print('>>>')

        list.reverse()

        print('>>> Start Build SQL')
        result = self.build_base_dic(target,list,existsUrls,order)
        print('>>> Build SQL Success')
        print('>>>')

        return result

    def build_base_dic(self,target,list,existsUrls,order):
        news = TBNews()
        article = TBArticle()

        picture_dao = PictureDao()
        result = []
        article_result = []

        """抓取正文"""
        for i in list:
            try:
                i['content_url'] = UrlHelper.unify(i['content_url']) #StringHelper.unescape(i['content_url'])

                if existsUrls.count(i['content_url']) < 1:  # getattr(i, 'url')

                    LogGo.info(">>> file id: " + str(i['fileid']))
                    LogGo.info(">>> url: " + str(i['content_url']))

                    try:
                        tup = ExtraJSON.wechat_extra_content(i['content_url'])  # getattr(i, 'url')
                    except Exception as e:
                        print(e)
                        print(">>>  ")
                        print(">>> extra content error.")
                        print(">>>  ")
                        LogGo.info("extra content error.")
                        LogGo.info("possible a deleted msg")
                        # LogGo.info("url: " + i['content_url'])
                        continue

                    raw_content = tup[1]
                    content = tup[2]

                    """字典的 键 对应数据库中的字段名 值 对应要存储的值"""
                    dic = dict()
                    article_dic = dict()

                    order = order + 5
                    dic[news.order_code.key] = order  # """排序代码"""
                    dic[news.create_date.key] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # """此条记录创建时间"""
                    dic[news.valid.key] = 1
                    dic[news.text_not_format.key] = content #"""去除标签的正文内容"""
                    # dic[news.text_blob.key] = raw_content #"""原始带标签字段"""
                    dic[news.subscribe_time.key] = i['datetime']  # getattr(i, 'publicTime') """文章发表日期"""
                    dic[news.author.key] = i['author']  # getattr(i, 'author')"""文章所属机构"""
                    dic[news.title.key] = i['title']  # getattr(i, 'title')"""文章标题"""
                    dic[news.subject.key] = i['digest'] # """摘要"""
                    dic[news.status.key] = 2

                    picture_id = picture_dao.save_data(i['cover'])
                    dic[news.main_pic_id.key] = picture_id #"""列表图片 id"""

                    article_dic[article.fingerprint.key] = md5(i['content_url'])#"""由地址生成的指纹"""
                    article_dic[article.target_id.key] = target.id
                    article_dic[article.company.key] = target.data_key  # getattr(i, 'author') """文章所属机构"""
                    article_dic[article.content_url.key] = i['content_url']  # getattr(i, 'url')"""正文链接"""
                    article_dic[article.scrabble_type.key] = 'wechat'  # """文章类型 微信固定值为  wechat  """
                    article_dic[article.is_scrabbled.key] = 1  # """在数据库中作为 这是一条抓取到的数据 的标记"""

                    result.append(dic)
                    article_result.append(article_dic)
            except Exception as e:
                import traceback
                msg = traceback.format_exc()
                print(msg)
                LogGo.warning(repr(e))
                continue

        return result, article_result

    """生成 getmasssendmsg js 请求链接,并且进行请求操作，直到失败"""
    def loopToFail(self,url,index):

        "拆分请求url 提取参数 用在之后的请求中 """
        we_par_header = url.split('?')[0]
        we_par = url.split('?')[1]
        we_pars = we_par.split('&')

        we_pars_dic = dict()

        we_pars_dic['count'] = 10
        we_pars_dic['f'] = 'json'
        we_pars_dic['x5'] = 0

        we_pars_dic['frommsgid'] = str(index)
        we_pars_dic['wxtoken'] = ''

        for par in we_pars:
            tmp = par.split('=',1)
            we_pars_dic[tmp[0]] = tmp[1]

        request_url = ""
        request_url += we_par_header
        request_url += "?"

        dest = ['__biz','uin','key','f', 'frommsgid','count','uin','key','pass_ticket', 'wxtoken','x5']

        for key in dest:
            request_url += key
            request_url += "="
            request_url += str(we_pars_dic[key])
            request_url += "&"

        """抓取关键字"""
        # keys = ['author', 'content_url', 'cover', 'digest', 'title', 'datetime', 'fileid']
        """抓取地址"""
        # raw = RequestHelper.get(request_url)
        raw = WechatRuler.req._get(request_url)
        # print(raw)

        """ 采集数据 开始字符 采集关键字 """
        tup = ExtraJSON.extraGetMassList(raw, WechatRuler.keys)

        return tup

# wechat = 'http://mp.weixin.qq.com/s?__biz=MzI3NTE2NTQyNw==&mid=2650732480&idx=4&sn=7d80d2d219c2e7a99555e28ef5d88ef3&chksm=f302ae5cc475274a6034b1674a20a119e2e136f3076f4472443f0e6e30ded5944af416832433&scene=27#wechat_redirect'
# UrlHelper.unify(wechat)