def parse(self, response):
     hxs = HtmlXPathSelector(response)
     titles = hsx.select('//td[@class="title"]')
     item = []
     for title in titles:
         item = HackernewsItem()
         item["title"] = title.select("a/text()").extract()
         item["url"] = title.select("a/href").extract()
         item.append(item)
     return items
Пример #2
0
    def parse(self, response):
        titles = Selector(response).xpath('//tr[@class="athing"]/td[3]')

        for title in titles:
            item = HackernewsItem()
            item['title'] = title.xpath("a[@href]/text()").extract()
            item['url'] = title.xpath("a/@href").extract()
            yield item
Пример #3
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     titles = hxs.select('//td[@class="title"]')
     items = []
     for title in titles:
         item = HackernewsItem()
         item['title'] = title.select('a/text()').extract()
         item['url'] = title.select('a/@href').extract()
         items.append(title)
     return items
Пример #4
0
 def parse(self, response):
     sel = Selector(response)
     # find all <td>'s where class = title
     titles = sel.xpath('//td[@class="title"]')
     items = []
     for title in titles:
         item = HackernewsItem()
         # find all <a>'s within each <td> that extracts the text
         item["title"] = title.xpath("a/text()").extract()
         # find all <a>'s within each <td> that extracts the url
         item["url"] = title.xpath("a/@href").extract()
         items.append(item)
     return items