Exemplo n.º 1
0
 def put_cookie_2_que(self, cookie):
     """
     将cookie放入队列里
     """
     que = config.ssn_2_slv
     cookie = loads_json(cookie)
     redis_cli.lpush(que, dumps_json(cookie))
Exemplo n.º 2
0
def get_shixin_each_info(data_list, http, ch) -> _data:
    """获取各失信被执行人详情"""
    data = []
    headers = cnf.headers_s_info
    url = cnf.url_s_info
    for each in data_list:
        params = deepcopy(cnf.params_s_info)
        params.update({
            'id': each[-1],
            'caseCode': each[-2],
            'pCode': ch.s_c_code,
            'captchaId': ch.s_c_id,
        })
        info = loads_json(
            http.receive_and_request(url=url,
                                     headers=headers,
                                     params=params,
                                     method='get'))
        if not info:
            # 代表验证码错误
            # todo: 验证码更新
            ch.get_new_captcha('shixin')
        else:
            data.append(info)
    return data
Exemplo n.º 3
0
def get_zhixing_each_info(data_list, http, ch) -> _data:
    """获取各执行人列表的详情"""
    data = []
    headers = cnf.headers_z_info
    url_info = cnf.url_z_info
    for each in data_list:
        pid = each[-1]
        params = deepcopy(cnf.params_z_info)
        # todo: 添加验证码部分
        params.update({
            'id': pid,
            'captchaId': ch.z_c_id,
            'j_captcha': ch.z_c_code,
            '_': int(time.time() * 1000)
        })
        info = loads_json(
            http.receive_and_request(url=url_info,
                                     headers=headers,
                                     params=params,
                                     method='get'))
        if not info:
            # 代表验证码错误
            ch.get_new_captcha('zhixing')
        data.append(info)
    return data
Exemplo n.º 4
0
    def receive_new_captcha(self, choice):
        """获取新的验证码"""
        if choice == 'zhixing':
            msg = pop_msg('captcha_z')
        else:
            msg = pop_msg('captcha_s')

        return loads_json(msg)
Exemplo n.º 5
0
Arquivo: weibo.py Projeto: yamoi/bot48
def get_headers_with_cookie() -> dict:
    weibo_requests_header = deepcopy(firefox_request_header)
    weibo_requests_header['Host'] = 'weibo.cn'
    cookies_dict = loads_json(COOKIES_JSON)
    weibo_requests_header['Cookie'] = "; ".join([
        "{}={}".format(cookie["name"], cookie["value"])
        for cookie in cookies_dict
    ])
    return weibo_requests_header
Exemplo n.º 6
0
def parse_zhixing_shixin_list(data_list) -> _zs_list:
    """解析列表数据"""
    js_dict = loads_json(data_list)
    data, pages = [], 1
    if js_dict is not None:
        for each in js_dict[0].get('result'):
            name = each.get('pname') if each.get('pname') else each.get(
                'iname')
            jsonObject = each.get('jsonObject')
            date = ''
            if jsonObject:
                d_dict = loads_json(jsonObject)
                date = d_dict.get('caseCreateTime') if d_dict.get(
                    'caseCreateTime') else d_dict.get('regDate')
            gisId = each.get('caseCode')
            pid = str(each.get('id'))
            data.append([name, date, gisId, pid])
        pages = js_dict[0].get('totalPage')
    return data, pages
Exemplo n.º 7
0
def parse_json(html):
    """解析车系的数据"""
    js_dict = loads_json(html)
    data = []
    if js_dict is not None:
        for each in js_dict.get('data'):
            id = each.get('id')
            name = each.get('name')
            # url = each.get('url')
            guochanhezijinkou = each.get('guochanhezijinkou')
            data.append([id, name, guochanhezijinkou])
    return data
Exemplo n.º 8
0
 def take_out_a_seed(self):
     """
     从生成器里取出一个种子
     之所以这样控制,seedMangement需要反馈
     每次投放固定量的种子
     """
     seed = None
     try:
         seed = loads_json(self.seeds_store.__next__())
     except:
         # 种子派发完毕
         logger.info('种子派发完毕')
     return seed
Exemplo n.º 9
0
def login_weibo():
    """
    微博登录
    :return:
    """
    is_exists_cookies_json = is_path_exists(COOKIES_JSON)
    if is_exists_cookies_json:
        print("cookies json 已存在")
        is_expiry = is_expiry_sub()
        if is_expiry:
            print("Cookies 即将过期 重新获取")
            print("帐号密码登录")
            driver_initial()
            driver = get_browser()
            try:
                print('准备登陆Weibo.cn网站...')
                driver.get(WEIBO_LOGIN_URL)
                # WebDriverWait(driver, 10).until(ec.presence_of_element_located((By.ID, "loginAction"))) 该句相较于下句不起作用
                WebDriverWait(driver, 10).until(
                    ec.visibility_of_element_located((By.ID, "loginAction")))
                elem_user = driver.find_element_by_id("loginName")
                elem_user.send_keys(Mine().username)  # 用户名
                elem_pwd = driver.find_element_by_id("loginPassword")
                elem_pwd.send_keys(Mine().password)  # 密码

                elem_sub = driver.find_element_by_id("loginAction")
                elem_sub.click()
                # 点击登陆,登录多次或异地登录可能会有验证码
                WebDriverWait(driver, 20).until(ec.url_contains('m.weibo.cn'))

                sina_cookies = driver.get_cookies()  # 包含多个 cookie 的字典列表
                # for cookie in sina_cookies:
                #     cookie['table'] = 'weibo_cookies'
                # with open(COOKIES_JSON, 'w', encoding="utf-8") as f:  # 保存Cookies
                #     f.write(json.dumps(sina_cookies, indent=4))
                dump_dict_to_json(sina_cookies, COOKIES_JSON)
                print('<登陆成功>')
                driver.close()
            except Exception as e:
                print("Error: <登录失败> {}".format(e))
        else:
            print("Cookies 登录")
            weibo_cookies = loads_json(COOKIES_JSON)
            driver_initial()
            driver = get_browser()
            driver.delete_all_cookies()
            driver.get("https://weibo.cn/")
            for wc in weibo_cookies:
                wc.pop('domain')
                driver.add_cookie(wc)
            driver.get("https://weibo.cn/")
Exemplo n.º 10
0
 def parase_html(self, html):
     info = []
     total_num = 1
     js_dict = loads_json(html)
     if js_dict is not None:
         data = js_dict.get('data')
         if isinstance(data, list) and data != []:
             for i in data:
                 for each in i.get('result'):
                     info.append(each)
                     # 放入持久化
                     write_2_file(self.baidu_list, dumps_json(each))
                 # 还需要判断是否有下一页
                 total_num = i.get('dispNum', 1)
     return info, total_num
Exemplo n.º 11
0
 def __delete_cookie(self, user_id):
     """
     这里是删除思路就是遍历一遍cookie_list
     除开删除的用户,其余重新写入文件里
     :param user_id:待删除的 user_id
     """
     cookie_list = self.load_cookies_list()
     new_cookie_list = []
     for cookie in cookie_list:
         cookie = loads_json(cookie)
         if not cookie.get('userid') == user_id:
             new_cookie_list.append(cookie)
     # 重新写入文件
     initial_file(config.user_info_file)
     if new_cookie_list != []:
         for each in new_cookie_list:
             write_2_file(config.user_info_file, dumps_json(each))
     return
Exemplo n.º 12
0
def listn_the_psm_que():
    """持续监听psm_que这个队列
    只要一有数据过来,就做存储
    """
    # 先反馈
    # 完成后像队里推送一条已完成启动
    print('持久化队列启动')
    que = config.task_que_fb
    ctx = dumps_json({'psm': 'done'})
    redis_cli.lpush(que, ctx)
    while True:
        if redis_cli.exists(psm_que):
            # 就开始处理
            token_set = make_set(token, blank='', index='')
            msg = redis_cli.rpop(psm_que)
            seed = loads_json(translate_2_json_dict(msg))
            print('{0}\t收到数据'.format(
                datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')))
            # 接下来就是做持久化处理了
            do_persistence(seed, token_set)
        time.sleep(0.1)
Exemplo n.º 13
0
def listen_task_que():
    """启动后
    开始监听 Task_Que
    拿到任务,先弄清是是个啥
    在转换自己的角色

    正常的种子 {"url": "xxxx", ......}
    任务: {"command": "xxxx"}
        session管理: ssnm
        seed管理: sedm
        persistence管理: psm

    # 09-07更新。需要为每一个节点打上一个标记
        为了不放js文件混乱,才这样的。

    """
    task_que = config.task_que
    mark_que = config.mark_que
    # 在监听任务前,需要先监听mark_que
    # 具体就是,从mark队列里拿到数字标号
    # 自增1作为自己的标号
    # 同时将自己的标号放入mark队列里

    # 先监听mark队列,拿到自己的编号
    while True:
        if redis_cli.exists(mark_que):
            msg = redis_cli.rpop(mark_que)
            if not msg:
                continue
            mark = int(msg.decode()) + 1
            break
        time.sleep(random.random())
    # 放入队列里
    redis_cli.lpush(mark_que, mark)
    print('当前slave编号\t{0}'.format(mark))
    # 完成了后,才开始监听这个任务队列

    while True:
        if redis_cli.exists(task_que):
            msg = redis_cli.rpop(task_que)
            # 开始分类msg属于什么任务:
            #
            if not msg:
                continue
            msg_dict = loads_json(msg.decode())
            print('{0}\t收到数据'.format(
                datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')))
            # 开始分类:
            if msg_dict.get('command'):
                # 这里commend
                commend = msg_dict.get('command')
                if commend == 'ssnm':
                    sm = SessionMangement()
                    sm.session_main_logic()
                elif commend == 'sedm':
                    sm = SeedsMangement()
                    sm.seed_main_logic()
                else:
                    # 化身持久化模块
                    listn_the_psm_que()

            else:
                # 那就是种子了
                # 这里要做的事情有
                # 1. 请求一个cookie
                # 2. 完成html的请求
                # 3. data放入psm队列里
                # 4. 反馈给seesion/seed模块
                time.sleep(random.random() * 10)
                seed = msg_dict
                # 调度spider, 把mark放入实例化中
                sp = SpiderHandler(mark)
                sp.receive_seed_and_start_crawl(seed)
                # 结束上一个,等下一个种子
                del sp

        time.sleep(0.1)
Exemplo n.º 14
0
def download_img_and_ocr(type):
    """同样的去请求,然后拿到数据
    需要去重写 requestAPI的 do_request
    下载的过程,将 zhixing 和 shixin文件里的captchaId都下载
    给个开关

    # 2018-10-29 在每次执行最后,将文件清空
    """
    is_go_on = False

    headers = config.headers_CaptchaHandler
    captcha_list = []
    if type == 'zhixing':
        captcha = config.file_captcha_zhixing
        headers.update({'Host': config.host_zs.get('zhixing')})

    else:
        captcha = config.file_captcha_shixin
        headers.update({'Host': config.host_zs.get('shixin')})
    for i in open(captcha, 'r', encoding='utf8'):
        url = config.url_captcha_zhixing if type == 'zhixing' else config.url_captcha_shixin
        params = config.params_captcha
        params.update({'captchaId': i.strip(), 'random': random.random()})
        # 开始请求

        logger.info('下载验证码图片\t{0}\t{1}'.format(type, i.strip()))
        di = Download_img()
        img = di.receive_and_request(url=url,
                                     headers=headers,
                                     params=params,
                                     method='GET')
        if img != 'null_html':
            """
            # 保存图片
            file_path = config.img_file.format(captcha)
            save_img(file_path=file_path, img=img)
            """
            # 执行ocr
            url = config.url_svm
            img_d = base64.b64encode(img)
            payloads = {'pic': img_d, 'type': 'pste'}
            result = di.receive_and_request(url=url,
                                            payloads=payloads,
                                            method='POST')
            try:
                result_dict = loads_json(result)
                if result_dict.get('status_code') == 200:

                    captcha_list.append(
                        [i.strip(),
                         result_dict.get('data').get('captcha')])
                    logger.info('完成图片ocr\t{0}\t{1}'.format(type, i.strip()))
                    is_go_on = True

            except Exception as e:
                logger.warning('ocr识别失败\t{0}'.format(e))

        else:
            logger.warning('下载验证码图片失败\t{0}\t{1}'.format(type, i.strip()))

    # 丢入队列里
    # 先要加入一个判断,列表不为空则行

    if captcha_list != []:
        que = config.que.get(type)
        logger.debug(captcha_list)
        push_2_que(que, dumps_json(captcha_list))
        logger.debug('ocr结果推入队列')

    return is_go_on
Exemplo n.º 15
0
def get_sub_expriy():
    weibo_cookies = loads_json(COOKIES_JSON)
    _sub_cookie = list(
        filter(lambda cookie: cookie['name'] == 'SUB', weibo_cookies))[0]
    sub_expriy = _sub_cookie['expiry']
    return sub_expriy
Exemplo n.º 16
0
 def verify_json_text(self, json_text):
     """验证返回的数据是否为正常"""
     js_dict = loads_json(json_text)
     return js_dict