示例#1
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = response.xpath('string(//div[@class="fl m-home_href"])').extract_first()
        pipleitem['S7'] = "PC"
        pipleitem['S2'] = '维科号'
        pipleitem['Q1'] = response.xpath('string(//div[@class="main"])').extract_first()
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#2
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = None
        pipleitem['S7'] = "APP"
        pipleitem['S2'] = '懂车帝APP'
        pipleitem['Q1'] = None
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#3
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = None
        pipleitem['S7'] = "APP"
        pipleitem['S2'] = '时代财经APP'
        pipleitem['Q1'] = re.findall('content:([\S\s]*?)groupId:', response.text)[0]
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#4
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = None
        pipleitem['S7'] = "APP"
        pipleitem['S2'] = '腾讯新闻app'
        # pipleitem['Q1'] = response.xpath('string(//div[@class="_1Xa3FHZJUzr6lzb4nMjOa4"])').extract_first()
        pipleitem['Q1'] = response.meta['content']
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#5
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = None
        pipleitem['S7'] = "APP"
        pipleitem['S2'] = '天天快报'
        pipleitem['Q1'] = response.xpath(
            'string(//div[@class="content-box"])').extract()
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#6
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.meta['date']
        pipleitem['S0'] = response.meta['id']
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.meta['title']
        pipleitem['S3a'] = '文章'
        pipleitem['G1'] = response.meta['author']
        pipleitem['S3d'] = None
        pipleitem['S7'] = "PC"
        pipleitem['S2'] = '老司机'
        content = response.xpath(
            'string(//div[@class="threa-main-box"])').extract_first()
        if len(content) != 0: content = content.replace('\t', '')
        pipleitem['Q1'] = content
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem
示例#7
0
    def content_parse(self, response):
        pipleitem = DongnanfumianItem()

        pipleitem['S6'] = response.css('.xd-b-b p span::text').extract_first()
        pipleitem['S0'] = re.findall('/(\d{5,})', response.url)[0]
        pipleitem['S1'] = response.url
        pipleitem['S4'] = response.css('head title::text').extract_first()
        pipleitem['S3a'] = '文章'
        author = re.findall(
            '(.*?)\d{4}',
            response.xpath(
                'string(//div[@class="xd-b-b"]/p)').extract_first())[0]
        pipleitem['G1'] = author
        pipleitem['S3d'] = None
        pipleitem['S7'] = "PC"
        pipleitem['S2'] = '经济观察网'
        pipleitem['Q1'] = response.xpath(
            'string(//div[@class="xx_boxsing"])').extract_first()
        pipleitem['S5'] = helper.get_localtimestamp()

        return pipleitem