示例#1
0
 def parse_item(self, response):
     titles = Selector(response).xpath('//div[@itemscope="itemscope"]')
     for title in titles:
         item = SocrataItem()
         item["text"] = title.xpath('.//div[@class="browse2-result-title"]/h2/a/text()').extract()[0]
         item["url"] = title.xpath('.//div[@class="browse2-result-title"]/h2/a/@href').extract()[0]
         item["views"] = title.xpath('.//div[@class="browse2-result-view-count-value"]/text()').extract()[0].strip()
         yield item
示例#2
0
 def parse(self, response):
     titles = response.xpath('//tr[@itemscope="itemscope"]')
     for title in titles:
         item = SocrataItem()
         item["text"] = title.select("td[2]/div/span/text()").extract()
         item["url"] = title.select("td[2]/div/a/@href").extract()
         item["views"] = title.select("td[3]/span/text()").extract()
         yield item
示例#3
0
 def parse_items(self, response):
     hxs = HtmlXPathSelector(response)
     titles = hxs.select('//tr[@itemscope="itemscope"]')
     items = []
     for t in titles:
         item = SocrataItem()
         item["text"] = t.select("td[2]/div/span/text()").extract()
         item["url"] = t.select("td[2]/div/a/@href").extract()
         item["views"] = t.select("td[3]/span/text()").extract()
         items.append(item)
     return (items)
示例#4
0
    def parse(self, response):
        titles = scrapy.Selector(response).xpath(
            "//div[@itemtype='http://schema.org/Dataset']")

        for title in titles:
            item = SocrataItem()
            item['text'] = title.xpath(
                "//a/span[@itemprop='name']/text()").extract()
            item['url'] = title.xpath(
                "//a[@href=contains('','https://opendata.socrata.com')]"
            ).extract()
            item['views'] = title.xpath(
                "//div[contains(@class,'view-count-value')]/text()").extract()
            yield item