Exemplo n.º 1
0
 def get_info(self, html):
     html_list = []
     page = 1
     h = Handel_html(html=html)
     result = h.pd_html()
     if not result:
         return ["无符合条件的数据"]
     total = h.get_total()  # 文书总条数
     t1 = int(total) % 5
     t2 = int(total) / 5
     if t1 != 0:
         total_page = int(t2) + 1
     else:
         total_page = int(t2)
     print(total_page)
     html_list.append(html)
     time.sleep(10)
Exemplo n.º 2
0
    def get_info(self):
        # self.driver.implicitly_wait(15) # 显性等待
        try:
            self.driver.get(self.cp_url)
            self.driver.find_element_by_xpath(
                '//input[@id="gover_search_key"]').click()
            self.driver.find_element_by_xpath(
                '//input[@id="gover_search_key"]').send_keys(self.company)
            self.driver.find_element_by_xpath(
                "//button[@class='head_search_btn']").click()
        except Exception as e:
            print("*" * 10, str(e))
            Log('log/cp_log.log', e=e)
            # self.driver.quit()
            return ["获取网页内容时出现异常"]

        time.sleep(10)
        html_list = []
        page = 1

        # [['(2016)京01民终7217号', '2016-12-19', '北京市第一中级人民法院', '魏巍等劳动争议二审民事判决书', ['民事', '二审'], 'http://wenshu.court.gov.cn/content/content?DocID=d4117425-9198-40a2-8a98-3711404e253b&KeyWord=深圳市中合银融资担保有限公司'], ['(2017)粤03民终2926号', '2017-05-12', '广东省深圳市中级人民法院', '江西中联建设集团有限公司与', ['民事', '二审'], 'http://wenshu.court.gov.cn/content/content?DocID=5d5cf5a2-2531-416b-9516-a85101011a03&KeyWord=深圳市中合银融资担保有限公司'], ['(2016)粤03民辖终3827、3828号', '2016-11-17', '广东省深圳市中级人民法院', '旭东电力集团有限公司与', ['民事', '二审'], 'http://wenshu.court.gov.cn/content/content?DocID=fe883891-d1a8-44a3-a23c-1f67a0349cb7&KeyWord=深圳市中合银融资担保有限公司'], ['(2016)粤0304民初18775-18776号', '2016-09-21', '深圳市福田区人民法院', '与旭东电力集团有限公司罗云富保证合同纠纷一审民事裁定书', ['民事', '一审'], 'http://wenshu.court.gov.cn/content/content?DocID=1c55cb08-667d-48eb-9dae-a7f4010e73cc&KeyWord=深圳市中合银融资担保有限公司'], ['(2016)粤0304民初18775-18776号之一', '2017-07-10', '深圳市福田区人民法院', '与旭东电力集团有限公司、罗云富保证合同纠纷一审民事裁定书', ['民事', '一审'], 'http://wenshu.court.gov.cn/content/content?DocID=197c2dfb-7520-4b7d-9598-a7f4010e73f2&KeyWord=深圳市中合银融资担保有限公司']]
        try:
            html = self.driver.page_source
            h = Handel_html(html=html)
            result = h.pd_html()
            if not result:
                self.driver.quit()
                return ["无符合条件的数据"]
            total = h.get_total()  # 文书总条数
            t1 = int(total) % 5
            t2 = int(total) / 5
            if t1 != 0:
                total_page = int(t2) + 1
            else:
                total_page = int(t2)
            print(total_page)

        except Exception as e:
            Log('log/cp_log.log', e=e)
            print("*" * 10, str(e))
            html = "获取网页内容时出现异常"
            html_list.append(html)
            total_page = 0
            self.driver.quit()

        html_list.append(html)

        while page < total_page:
            print("***********page", page)
            try:
                self.driver.implicitly_wait(15)
                self.driver.find_element_by_xpath('//a[@class="next"]').click()
            except Exception as e:
                print("*" * 10, str(e))
                # 将错误写入日志
                # print('发生异常')
                self.driver.quit()
                break
            time.sleep(10)
            page += 1
            try:
                html = self.driver.page_source
            except Exception as e:
                Log('log/cp_log.log', e=e)
                html = "获取网页内容时出现异常"
                html_list.append(html)
                self.driver.quit()
            html_list.append(html)

        self.driver.quit()

        return html_list