def get_next_page(self, log):
     '''
     翻页功能,获取列表页下一页
     :return:
     '''
     #计算总页数( 向上取整)
     company_page = math.ceil(self.company_num / 20)
     log.info("总页数:{}".format(company_page))
     data = []
     #拼接url
     for next_link in range(1, int(company_page) + 1):
         # log.info("当前页码:{}".format(next_link))
         if next_link != 1:
             next_page_url = "https://www.tianyancha.com/search/p{}?key={}".format(
                 next_link, self.label_name)
             self.driver.get(next_page_url)
             time.sleep(2)
         #解析页面
         item = self.parse_and_get_list_company(log)
         if item:
             data.extend(item)
     if data:
         insert_db(data, log)
         name_results_coll.update_one({'_id': self._id},
                                      {'$set': {
                                          "flag": 1
                                      }})
 def update_name_db(self, log):
     '''
     修改name_results字段
     :return:
     '''
     try:
         # 更新名字数量
         name_results_coll.update_one(
             {'_id': self._id}, {'$set': {"name_num": self.name_num, "company_numm": self.company_num}}
         )
         # log.info("数据更新中...")
     except Exception as e:
         log.info("数据无法更新...")
    def parse_company_pages(self, log):
        '''
        判断该名字的数量是否在范围内
        :return:
        '''
        while True:
            try:
                # 获取公司数量
                try:
                    tmp = self.driver.find_element_by_xpath(
                        '//span[contains(text(),"天眼查为你找到")]/..').text
                    _ = re.search(r'(\d+)', tmp).group(1)
                    if '+' in _:
                        self.company_num = _
                    else:
                        self.company_num = int(_)
                except Exception as e:
                    log.error(e)
                    self.company_num = 0
                """--------------------"""
                # 获取老板数量
                try:
                    tmp = self.driver.find_element_by_xpath(
                        '//div[contains(text(),"天眼查为你找到")]/..').text
                    self.name_num = int(re.search(r'(\d+)', tmp).group(1))
                except Exception as e:
                    log.error(e)
                    self.name_num = 0

                log.info('老板数量:{}'.format(self.name_num))
                log.info('公司数量:{}'.format(self.company_num))
                if isinstance(self.company_num, str):
                    name_results_coll.update_one({'_id': self._id}, {
                        '$set': {
                            "name_num": self.name_num,
                            "company_numm": 0,
                            'flag': 2
                        }
                    })
                    return
                self.update_name_db(log)
                if self.company_num != 0 and self.company_num <= 100:
                    self.get_next_page(log)
                break
            except Exception as e:
                log.info(e)
                log.info("parse_company_pages异常")
                break
    def handle_question(self, log):
        '''
        判断是否登录成功
        :return:
        '''
        try:
            if ('proxy' or '503' or '500') in self.driver.page_source:
                self.driver.refresh()
                sleep(3)
            #如果跳转到首页即登录成功
            home_pages = self.driver.find_element_by_xpath(
                "//input[@id='home-main-search']")
            if home_pages:
                #提前打标记
                account_results_coll.update_one({'_id': self.account_id},
                                                {'$set': {
                                                    "flag": 1
                                                }})
                self.flag = False
                return
        except Exception as e:
            # 如果找到登录页面元素,即账号不可用,登录失败
            try:
                no_use = self.driver.find_element_by_xpath(
                    "//div[@class='pb30 position-rel']/input")
                if no_use:
                    # 提前打标记
                    # 标记为不可用,下次不再取出
                    account_results_coll.update_one({'_id': self.account_id},
                                                    {'$set': {
                                                        "usable": 1
                                                    }})
                    return
            except Exception as e:
                # 获取列表页所有详情urls
                try:
                    tmp = self.driver.find_element_by_xpath(
                        '//span[contains(text(),"天眼查为你找到")]/..').text
                    match = int(re.search(r'天眼查为你找到(\d+)家公司', tmp).group(1))
                except Exception as e:
                    log.error(e)
                    match = 0
                if match:
                    self.parse_company_pages(log)
                    self.check_count = 0
                #都找不到说明没有进入列表页
                else:
                    try:
                        self.driver.find_element_by_xpath(
                            "//div[@class='container']//div[@class='content']")
                        # self.image_handle(log)
                        # 提前打标记, 标记需要打码
                        updateTime = str(int(time.time() * 1000))
                        account_results_coll.update_one(
                            {'_id': self.account_id},
                            {'$set': {
                                "flag": 1,
                                'updateTime': updateTime
                            }})
                        self.flag = True
                        self.login(log)
                    # 如果出现检索条件过大,或者账号暂时不可用
                    except Exception as e:
                        if '普通用户可查看100家公司,VIP会员可查看5000家公司' in self.driver.page_source:
                            # 提前打标记
                            account_results_coll.update_one(
                                {'_id': self.account_id},
                                {'$set': {
                                    "flag": 0
                                }})
                            # self.search_name_from_mongo()
                            # self.get_name_to_search()
                        else:
                            try:
                                error_str = self.driver.find_element_by_xpath(
                                    "//div[@class='f24 mb40 mt40 sec-c1 ']"
                                ).text
                                if error_str == "抱歉,没有找到相关结果!":
                                    log.info("[ERROR]: 抱歉,没有找到相关结果!")
                                    # 继续搜索下一个名字
                                    # 更新名字数量
                                    name_results_coll.update_one(
                                        {'_id': self._id}, {
                                            '$set': {
                                                "name_num": 0,
                                                "company_numm": 0,
                                                'flag': 4
                                            }
                                        })
                                    self.check_count += 1
                                    if self.check_count >= 4:
                                        updateTime = str(
                                            int(time.time() * 1000))
                                        account_results_coll.update_one(
                                            {'_id': self.account_id}, {
                                                '$set': {
                                                    "flag": 0,
                                                    'updateTime': updateTime
                                                }
                                            })
                                        self.flag = True
                                        self.login(log)

                                    # 更新公司数量
                                    # self.db.name_results.update_one({'_id': self._id},{'$set': {"company_numm": 0}})
                                    # self.get_name_to_search()
                            except Exception as e:
                                # 匹配错误提示信息
                                error_info = self.driver.find_element_by_xpath(
                                    "/html/body/div/div[1]").text
                                if error_info == "系统检测到您非人类行为,己被禁止访问天眼查,若有疑问请联系官方qq群 515982002":
                                    log.info("[ERROR]: {}".format(error_info))
                                    updateTime = str(int(time.time() * 1000))
                                    account_results_coll.update_one(
                                        {'_id': self.account_id}, {
                                            '$set': {
                                                "flag": 0,
                                                'updateTime': updateTime
                                            }
                                        })
                                    self.flag = True
                                    self.login(log)