def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = response.xpath('string(//div[@class="fl m-home_href"])').extract_first() pipleitem['S7'] = "PC" pipleitem['S2'] = '维科号' pipleitem['Q1'] = response.xpath('string(//div[@class="main"])').extract_first() pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = None pipleitem['S7'] = "APP" pipleitem['S2'] = '懂车帝APP' pipleitem['Q1'] = None pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = None pipleitem['S7'] = "APP" pipleitem['S2'] = '时代财经APP' pipleitem['Q1'] = re.findall('content:([\S\s]*?)groupId:', response.text)[0] pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = None pipleitem['S7'] = "APP" pipleitem['S2'] = '腾讯新闻app' # pipleitem['Q1'] = response.xpath('string(//div[@class="_1Xa3FHZJUzr6lzb4nMjOa4"])').extract_first() pipleitem['Q1'] = response.meta['content'] pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = None pipleitem['S7'] = "APP" pipleitem['S2'] = '天天快报' pipleitem['Q1'] = response.xpath( 'string(//div[@class="content-box"])').extract() pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.meta['date'] pipleitem['S0'] = response.meta['id'] pipleitem['S1'] = response.url pipleitem['S4'] = response.meta['title'] pipleitem['S3a'] = '文章' pipleitem['G1'] = response.meta['author'] pipleitem['S3d'] = None pipleitem['S7'] = "PC" pipleitem['S2'] = '老司机' content = response.xpath( 'string(//div[@class="threa-main-box"])').extract_first() if len(content) != 0: content = content.replace('\t', '') pipleitem['Q1'] = content pipleitem['S5'] = helper.get_localtimestamp() return pipleitem
def content_parse(self, response): pipleitem = DongnanfumianItem() pipleitem['S6'] = response.css('.xd-b-b p span::text').extract_first() pipleitem['S0'] = re.findall('/(\d{5,})', response.url)[0] pipleitem['S1'] = response.url pipleitem['S4'] = response.css('head title::text').extract_first() pipleitem['S3a'] = '文章' author = re.findall( '(.*?)\d{4}', response.xpath( 'string(//div[@class="xd-b-b"]/p)').extract_first())[0] pipleitem['G1'] = author pipleitem['S3d'] = None pipleitem['S7'] = "PC" pipleitem['S2'] = '经济观察网' pipleitem['Q1'] = response.xpath( 'string(//div[@class="xx_boxsing"])').extract_first() pipleitem['S5'] = helper.get_localtimestamp() return pipleitem