def parse_item(self, response): titles = Selector(response).xpath('//div[@itemscope="itemscope"]') for title in titles: item = SocrataItem() item["text"] = title.xpath('.//div[@class="browse2-result-title"]/h2/a/text()').extract()[0] item["url"] = title.xpath('.//div[@class="browse2-result-title"]/h2/a/@href').extract()[0] item["views"] = title.xpath('.//div[@class="browse2-result-view-count-value"]/text()').extract()[0].strip() yield item
def parse(self, response): titles = response.xpath('//tr[@itemscope="itemscope"]') for title in titles: item = SocrataItem() item["text"] = title.select("td[2]/div/span/text()").extract() item["url"] = title.select("td[2]/div/a/@href").extract() item["views"] = title.select("td[3]/span/text()").extract() yield item
def parse_items(self, response): hxs = HtmlXPathSelector(response) titles = hxs.select('//tr[@itemscope="itemscope"]') items = [] for t in titles: item = SocrataItem() item["text"] = t.select("td[2]/div/span/text()").extract() item["url"] = t.select("td[2]/div/a/@href").extract() item["views"] = t.select("td[3]/span/text()").extract() items.append(item) return (items)
def parse(self, response): titles = scrapy.Selector(response).xpath( "//div[@itemtype='http://schema.org/Dataset']") for title in titles: item = SocrataItem() item['text'] = title.xpath( "//a/span[@itemprop='name']/text()").extract() item['url'] = title.xpath( "//a[@href=contains('','https://opendata.socrata.com')]" ).extract() item['views'] = title.xpath( "//div[contains(@class,'view-count-value')]/text()").extract() yield item