Exemplo n.º 1
0
    def start_requests(self):

        httpid = redis_instance.get('__running_http_')
        redis_instance.delete('__running_http_')
        print('httpid %s' % httpid)
        http = mongo_instance.https.find_one(filter={'_id': ObjectId(httpid)})

        task_obj_id = http['taskid']
        print('taskid %s' % str(task_obj_id))
        task = mongo_instance.tasks.find_one(filter={'_id': task_obj_id})
        print('- task finded')
        print(task)
        self.http = http
        self.task = task

        cookie_str = http['actionhome']['REQUEST_HEADERS']['Cookie'].replace(
            ' ', '')
        cookie_arr = cookie_str.split(';')
        # NOTE 我曹!
        cookies = {
            item.split('=', 1)[0]: item.split('=', 1)[1]
            for item in cookie_arr
        }
        print('- cookies')
        print(cookies)

        FakeLoadParams.cookies['pass_ticket'] = http['pass_ticket']
        FakeLoadParams.cookies['wap_sid2'] = cookies['wap_sid2']
        FakeLoadParams.cookies['wxuin'] = cookies['wxuin']
        FakeLoadParams.cookies['version'] = cookies['version']

        FakeLoadParams.params['__biz'] = http['biz']
        FakeLoadParams.params['pass_ticket'] = http['pass_ticket']
        FakeLoadParams.params['appmsg_token'] = http['appmsg_token']

        url = NORMAL_URLS.load
        arr = []
        for key, val in FakeLoadParams.params.items():
            # print(val)
            arr.append(key + '=' + val)
        queryString = '?' + '&'.join(arr)
        print(queryString)
        print('- FakeLoadParams cookies')
        print(FakeLoadParams.cookies)
        self.crawled_times = 1

        if 'running_in_http' in self.task['task_status']:
            yield scrapy.Request(url=url + queryString,
                                 headers=FakeLoadParams.headers,
                                 cookies=FakeLoadParams.cookies,
                                 method='GET')
        else:
            return
Exemplo n.º 2
0
 def add_nick_name(ordered_req_dict):
     """
     :param ordered_req_dict:
     :return:添加nick_name
     """
     wxuin_nn_dict = {}
     nickname = TidyReqData.get_nickname()
     for key in redis_instance.keys("*.nick_name"):
         wxuin_nn_dict[(redis_instance.get(key)).decode('utf8')] = (
             key.decode('utf8')).split('.')[0]
     for key in ordered_req_dict:
         ordered_req_dict[key]['nick_name'] = wxuin_nn_dict[key]
         ordered_req_dict[key]['wxuin'] = key
         ordered_req_dict[key]['nickname'] = nickname
     return ordered_req_dict
Exemplo n.º 3
0
 def get_all_req_data():
     """
     获取redis中所有的请求文件,也就是key中含有.req字段的记录。最终返回的数据根据key中的时间戳进行排序过
     :return:
     {'1532859863455.getappmsgext.req':dict_file,)
       '1523423421446.appmsg_comment.req':dict_file}
     """
     unordered_req_dict = {}
     ordered_req_dict = collections.OrderedDict()
     # 遍历所有的请求文件
     for key in redis_instance.keys("*.req"):
         req_bin_data = redis_instance.get(key)
         try:
             req_dict_data = json.loads(req_bin_data)
         except:
             req_dict_data = str(req_bin_data)
             # req_dict_data = req_bin_data.decode('utf8')
         unordered_req_dict[key.decode('utf8')] = req_dict_data
     # 按照时间顺序排序之后返回字典
     for key in sorted(unordered_req_dict.keys()):
         ordered_req_dict[key] = unordered_req_dict[key]
     return ordered_req_dict
Exemplo n.º 4
0
 def get_nickname():
     return redis_instance.get('current_nickname').decode('utf8')
    def get_xcx_item_list(self, nickname, hand=False):
        """
        获取小程序所有请求数据
        :param hand: 是否手动
        :param nickname: 小程序名称
        :return:
        """
        print(nickname)
        TidyReqData.flush_data("*.req")
        self.home_to_search()
        self.search_xcx(nickname)
        # 选中第一个结果后进入小程序,先选择第一个栏目
        self.oap.tap(tuple(eval(self.data['BTN']['JIU_QIAN_ZFJY'])))
        time.sleep(1)
        # self.oap.tap(tuple(eval(self.data['BTN']['JIU_QIAN_HWYJ'])))
        # 截图 与记录匹配获取相关信息

        # 方案一:先拉取全部文章列表,然后遍历获取每篇文章
        # 方案二:现截现获取信息
        get_list_slide_num = 0
        while redis_instance.get("xcx_get_list_stop") is None:
            self.oap.swap([60, 1000], [60, 250])
            get_list_slide_num = get_list_slide_num + 1
            time.sleep(0.5)
        # 回退到首部
        if redis_instance.get("xcx_get_list_stop"):
            for i in range(get_list_slide_num):
                self.oap.swap([60, 250], [60, 1000])

        # 获取小程序信息列表
        xcx_item_list = TidyReqData.get_xcx_req_data("*._xcx")
        # xcx_item_list = []
        for item in xcx_item_list:
            print("当前文档", item['title'])
            if xcx.doc_exist("jqzt", item['id']):
                self.oap.swap([60, 500], [60, 250])
                continue
            # 遍历每一项,并截图处理
            item_pos = self.vc.click_by_words(item['title'], tap=False)
            print(item_pos, "", item['title'])
            self.oap.tap(item_pos)
            time.sleep(3)
            self.oap.key(self.data['KEY']['BACK_KEYEVENT'])

            # 到达限制次数,退出循环
            if redis_instance.get("xcx_get_detail_stop"):
                break

            self.oap.swap([60, 500], [60, 250])
            # 滑动拉取列表拉完停止
            time.sleep(1)

        self.oap.key(self.data['KEY']['BACK_KEYEVENT'])
        self.oap.key(self.data['KEY']['BACK_KEYEVENT'])
        print("原始数据进入mongo %s" % ("xcx_jqzt"))
        TidyReqData.insert_xcx_to_mongo("xcx_jqzt")
        print("原始数据进入mongo %s 完成" % ("xcx_jqzt"))
        print("正在为 %s 创建索引..." % ("jqzt"))
        index_result = xcx.index_db_docs("jqzt")
        print("索引完成", index_result)
        print("redis 相关数据设置缓存时间")
        ttl_result = TidyReqData.set_redis_ttl(60 * 60 * 5)
        print("redis 5小时失效时间设置完成")