示例#1
0
 def run_getter(self, cycle=GETTER_CYCLE):
     """定时获取cookie"""
     getter = Getter()
     while True:
         logger.info('开始抓取cookies')
         getter.run()
         time.sleep(cycle)
示例#2
0
 def run_tester(self, cycle=TESTER_CYCLE):
     """定时检测cookie可用情况"""
     tester = Tester()
     while True:
         logger.info('开始检查')
         tester.run()
         time.sleep(cycle)
示例#3
0
    async def test_one_proxy(self, key, proxy):
        """对目标网站测试一个cookies是否可用"""
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 解码为字符串
                headers = {
                    "Accept":
                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                    "Accept-Encoding":
                    "gzip, deflate, br",
                    "Accept-Language":
                    "zh-CN,zh;q=0.9",
                    "Cache-Control":
                    "max-age=0",
                    "Connection":
                    "keep-alive",
                    "Cookie":
                    proxy[:-1],
                    "Host":
                    "www.tianyancha.com",
                    "Upgrade-Insecure-Requests":
                    "1",
                    "User-Agent":
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
                }

                # async with session.get(TEST_URL, headers=headers, timeout=30) as response:
                try:
                    response = requests.get(TEST_URL,
                                            headers=headers,
                                            timeout=30)
                    result = response.text
                    html = etree.HTML(result)
                    # print("".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()')))
                    user = "".join(
                        html.xpath('//span[@class="ni-sp-name"]//text()'))
                    """"".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()'))"""
                    if response.status_code in TRUE_STATUS_CODE and user:
                        # cookie可用
                        self.redis.max(key, proxy)
                        logger.info(f'用户可用 - {user}')
                    else:
                        # cookie不可用
                        # send = Send_Click()
                        # staus = send.run(proxy)
                        # if staus:
                        #     self.redis.max(key, proxy)
                        #     print(key, 100, "通过点字验证")
                        # else:
                        self.redis.decrease(key, proxy)
                        logger.warning(f'{key} 账号, 状态吗错误')
                except Exception as e:
                    logger.error(f'{key} 账号, 请求错误 - {e}')
            except Exception as e:
                # self.redis.decrease(key, proxy)
                logger.error(f'{key} 账号, 测试错误 - {e}')
示例#4
0
 def run(self):
     logger.info('cookies池开始运行')
     if TESTER_ENABLE:
         tester_process = Process(target=self.run_tester)
         tester_process.start()
     if GETTER_ENABLE:
         getter_process = Process(target=self.run_getter)
         getter_process.start()
     if API_ENABLE:
         api_process = Process(target=self.run_api)
         api_process.start()
示例#5
0
    def run(self):
        # 爬接口  如果是正常网页  title不会是  天眼查验证
        resp = self.download(self.url)
        title = self.verify(resp.text)
        logger.info(f'判断网页名称 - {title}')
        html = etree.HTML(resp.text)
        user = html.xpath('//span[@class="ni-sp-name"]')
        logger.info('页面正常')
        if user:
        # if user and title != '天眼查校验':
            return 200
            # 继续操作
        elif title == '天眼查校验':
            # 如果是点触验证码
            # 调用验证 接打码平台 返回坐标 [{"x":72,"y":66},{"x":97,"y":32}]  坐标类型list 里面每个字符组成一个字典x,y  依次顺序
            if self.verify_image() == 'ok':
                # 可以继续爬这个接口  url
                response = self.download(self.url) # 验证成功后可以继续操作
                html = etree.HTML(response.text)
                result = html.xpath('//span[@class="ni-sp-name"]')
                # //span[@class="ni-sp-name"]
                # print(result, '='*10)
                if result:
                    logger.info(f'验证成功 - {response.status_code}')
                    return response.status_code
                else:
                    return 503

            else:
                # 没验证成功  继续验证
                # self.run()
                self.email.run('Login verification failed, such as continuous occurrence of the representative Super Eagle is not enough!')
                return 503
        else:
            return 503
示例#6
0
    def slice(self, targetImage, bgImage):
        """
        拼接图片验证码
        :param targetImage: 验证图片 点击顺序字符
        :param bgImage: 验证图片  字符
        :return:
        """
        # 打开文件二进制流图片bytes数据
        img = Image.open(BytesIO(base64.urlsafe_b64decode(targetImage)))
        img2 = Image.open(BytesIO(base64.urlsafe_b64decode(bgImage)))

        # new_image 是拼接好的图片
        new_image = Image.new('RGB', (320, 130), 'red')
        new_image.paste(img, (0, 0))
        new_image.paste(img2, (0, 30))

        # new_image.show()
        # new_image.save('captcha.jpg')

        chaojiying = Chaojiying_Client("L54555", "Li891004", '90004')  # 用户中心>>软件ID 生成一个替换 96001
        # im = open('a.jpg', 'rb').read()  # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
        bytes_image = BytesIO()
        new_image.save(bytes_image, format='PNG')
        new_image = bytes_image.getvalue()
        dict_data = chaojiying.PostPic(new_image, 9004)  # 1902 验证码类型  官方网站>>价格体系 3.4+版 print 后要加()
        pic_str = dict_data.get('pic_str').split('|')
        logger.info(f'坐标信息 - {pic_str}')
        lis = []
        if pic_str[0]:
            [lis.append({'x': int(data.split(',')[0]), 'y':int(data.split(',')[1])-30}) for data in pic_str]
        # ===============模拟打码平台=================
        # for _ in range(4):
        #     x = int(input('请输入坐标x:'))
        #     if x == 0:
        #         break
        #     y = int(input('请输入坐标y:'))
        #     lis.append({'x': x, 'y': y})

        return lis
示例#7
0
    def verify_image(self):
        # 获取图片验证码返回的图片  b64串
        # dt = str(int(datetime.now().timestamp() * 1000))
        url = "http://antirobot.tianyancha.com/captcha/getCaptcha.json?t={}&_={}".format(str(int(datetime.now().timestamp() * 1000)), str(int(datetime.now().timestamp() * 1000) - 100))
        result = self.download(url)  # 获取数据
        data = result.json().get('data')
        targetImage = data.get('targetImage')  # 拿到要顺序点击的字符
        bgImage = data.get('bgImage')  # 拿到字符图片
        captchaId = data.get('id')  # 拿到图片id
        # 拼接图片  函数里面接入打码平台
        lis = self.slice(targetImage, bgImage)

        # 拼接参数  发送验证请求
        params = {
            'captchaId': captchaId,  # 图片唯一id
            'clickLocs': json.dumps(lis),  # 图片坐标
            't': str(int(datetime.now().timestamp() * 1000)),  # 当前时间戳
        }
        # 验证成功
        resp = self.download("http://antirobot.tianyancha.com/captcha/checkCaptcha.json", params=params)
        logger.info(f'验证结果 - {resp.json()}')
        return resp.json().get('state')
示例#8
0
 def run(self):
     """开始抓取cookies存入数据库"""
     accounts_usernames = self.accounts_db.usernames()
     keys = self.redis.get()
     for username in accounts_usernames[:]:
         if not username in keys:
             password = self.accounts_db.get_value(username)
             logger.info(f'正在生成Cookies - 账号 {username} - 密码 {password}')
             if not self.is_over_threshold():
                 try:
                     time.sleep(5)
                     cookie = self.crawler.crawl_main(username, password)
                     if cookie:
                         self.redis.add(username, cookie)
                         logger.info(f"账号 {username} cookie有效")
                     else:
                         logger.info("监控到cookie为空, 登录失败")
                 except Exception as e:
                     logger.warning(f'请求出错 - {e}')
         else:
             # print('账号', username, "存在于cookie池里")
             pass
示例#9
0
from run import Run
from util.configtion import logger

if __name__ == '__main__':
    logger.info('开始运行')
    start = Run()
    start.run()