def parse(self, response): print 'response' for div in response.css("a[class=j_th_tit] "): item = jwcsysuItem() item["newTitle"] = "wow" item["newContent"] = div.xpath('@title').extract()[0] item["newHref"] = div.xpath('@href').extract()[0] print item yield item
def parse(self, response): print 'response' for div in response.css("div[class=threadlist_lz]"): item = jwcsysuItem() item["newTitle"] = "wow" print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' item["newContent"] = div.css("a::(attr=title)").extract() item["newHref"] = div.css("a::(attr=href)").extract() print item yield item
def parse_item(self, response): item = jwcsysuItem() try: item["newCatalog"] = response.css("div[class=sec_art_list]").css("a").extract()[2].split("<")[-2].split(">")[-1] item["newTitle"] = response.css("div[class=art_content]").css("h1::text").extract()[0] item["newContent"] = response.css("div[class=content]").extract()[0] item["newHref"] = response.url item["newTime"] = response.css("div[class=art_property]").extract()[0][-62:-52] return item except Exception, e: return None
def parse_item(self, response): item = jwcsysuItem() try: item["newCatalog"] = response.css("div[class=sec_art_list]").css( "a").extract()[2].split("<")[-2].split(">")[-1] item["newTitle"] = response.css("div[class=art_content]").css( "h1::text").extract()[0] item["newContent"] = response.css( "div[class=content]").extract()[0] item["newHref"] = response.url item["newTime"] = response.css( "div[class=art_property]").extract()[0][-62:-52] return item except Exception, e: return None