def parse(self, response): hxs = HtmlXPathSelector(response) titles = hsx.select('//td[@class="title"]') item = [] for title in titles: item = HackernewsItem() item["title"] = title.select("a/text()").extract() item["url"] = title.select("a/href").extract() item.append(item) return items
def parse(self, response): titles = Selector(response).xpath('//tr[@class="athing"]/td[3]') for title in titles: item = HackernewsItem() item['title'] = title.xpath("a[@href]/text()").extract() item['url'] = title.xpath("a/@href").extract() yield item
def parse(self, response): hxs = HtmlXPathSelector(response) titles = hxs.select('//td[@class="title"]') items = [] for title in titles: item = HackernewsItem() item['title'] = title.select('a/text()').extract() item['url'] = title.select('a/@href').extract() items.append(title) return items
def parse(self, response): sel = Selector(response) # find all <td>'s where class = title titles = sel.xpath('//td[@class="title"]') items = [] for title in titles: item = HackernewsItem() # find all <a>'s within each <td> that extracts the text item["title"] = title.xpath("a/text()").extract() # find all <a>'s within each <td> that extracts the url item["url"] = title.xpath("a/@href").extract() items.append(item) return items