Python request_dis示例

编程语言: Python

命名空间/包名称: ktgg

方法/功能: request_dis

hotexamples.com的示例: 5

Python request_dis - 已找到5个示例。这些是从开源项目中提取的最受好评的ktgg.request_dis现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def parse_html(self,links):
        # 连接数据库
        db,cursor = ktgg.con_mysql()

        for i in links:
            d = {}
            url = 'http://' + re.findall('//(.*?)/',self.url)[0] + i
            text,html= ktgg.request_dis(url)
            if text == '':
                continue
            # 提取一些信息
            d['posttime'] = re.findall('发布时间(.*?)<',html)[0].replace('：','').strip()
            d['court'] = '长沙市望城区人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<",html)[0]
            d['province'] = '湖南省'
            # 防止body为空,如果为空则为标题
            for i in self.tihuan:
                text = text.replace(i,'')
            d['body'] = text
            if text == '':
                d['body'] = d['title']
            self.parse_text(d,db,cursor)

        # 关闭数据库
        ktgg.clo_mysql(db,cursor)

示例#2

显示文件

文件： beihu.py 项目： tianming1903/ktgg_hunan

    def parse_html(self,links):
        # 连接数据库
        db,cursor = ktgg.con_mysql()

        for i in links:
            d = {}
            url = 'http://bhqfy.chinacourt.gov.cn' + i
            text,html= ktgg.request_dis(url)
            if text == 0:
                continue
    
            # 提取一些信息
            d['posttime'] = re.findall('发布时间(.*?)<',html)[0].replace('：','').strip()
            d['court'] = '北湖人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<",html)[0]
            d['province'] = '湖南省'
            if text == '':
                text = d['title']
            html = etree.HTML(html)
            self.parse_text(text,html,d,db,cursor)

        # 关闭数据库
        ktgg.clo_mysql(db,cursor)

示例#3

显示文件

    def parse_html(self, links):
        # 连接数据库
        db, cursor = ktgg.con_mysql()

        for i in links:
            d = {}
            url = 'http://zzxfy.chinacourt.gov.cn' + i
            text, html = ktgg.request_dis(url)
            if text == 0:
                continue

            # 提取一些信息
            d['posttime'] = re.findall('发布时间(.*?)<',
                                       html)[0].replace('：', '').strip()
            d['court'] = '湖南省渌口区人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<", html)[0]
            d['province'] = '湖南省'
            # 文本不存在就用标题替代文本
            if text == '':
                text = d['title']
            # 做一个特殊的处理，删除这两条信息(一个非开庭公告，一个内容为表格形式)
            if '保护当事人的诉讼权利' in d['title']:
                continue
            if '2012年8月1日至8月31日' in d['title']:
                continue
            self.parse_text(text, d, db, cursor)

        # 关闭数据库
        ktgg.clo_mysql(db, cursor)

示例#4

显示文件

    def parse_html(self, links):
        # 连接数据库
        db, cursor = ktgg.con_mysql()

        for i in links:
            d = {}
            url = 'http://hnyzfy.chinacourt.gov.cn' + i
            text, html = ktgg.request_dis(url)
            if text == 0:
                continue

            # 提取一些信息
            d['posttime'] = re.findall('发布时间(.*?)<',
                                       html)[0].replace('：', '').strip()
            d['court'] = '宜章人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<", html)[0]
            if d['title'] == '':
                t = etree.HTML(html)
                d['title'] = t.xpath('//div[@class="b_title"]/span/text()')[0]
            d['province'] = '湖南省'
            self.parse_text(text, d, db, cursor)

        # 关闭数据库
        ktgg.clo_mysql(db, cursor)

示例#5

显示文件

    def parse_html(self, links):
        # 连接数据库
        db, cursor = ktgg.con_mysql()
        for i in links:
            d = {}
            url = 'http://sfqfy.chinacourt.gov.cn' + i
            text, html = ktgg.request_dis(url)
            if text == '':
                continue
            # 提取一些信息
            d['posttime'] = re.findall('发布时间：(.*?)<', html)[0].strip()
            d['court'] = '石峰区人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<", html)[0]
            d['province'] = '湖南省'
            self.parse_text(text, d, db, cursor)

        # 关闭数据库
        ktgg.clo_mysql(db, cursor)