Пример #1
0
    def parse(self, response):
        for source in response.xpath("//div[@data-tn-component='organicJob']"):
            item = CleanLoader(item=IndeedItem(), selector=source)
            item.add_value("source", u"indeed")
            item.add_xpath("title", "h2/a[@itemprop='title']/@title")
            item.add_xpath("link", "h2/a[@itemprop='title']/@href", TakeFirst(), response.urljoin)
            item.add_xpath("company", "span/span[@itemprop='name']/text()")
            item.add_xpath("location", "span/span/span[@itemprop='addressLocality']/text()")
            item.add_xpath("date", "table//span[@class='date']/text()", re="(\d)\w+")
            d = item.load_item()
            print d["date"]
        #            yield item.load_item()

        next_page = response.xpath("//div[@class='pagination']/b/following-sibling::a[1]/@href").extract()
        if next_page:
            url = response.urljoin("".join(next_page))
            yield scrapy.Request(url, self.parse)
Пример #2
0
 def parse(self, response):
     item = CleanLoader(item=ElancefItem(), response=response)
     item.add_xpath("name", "//a[@class='title-link']/text()")
     item.add_xpath("link", "//a[@class='title-link']/@href", TakeFirst(), re="https://[^#]*")
     item.add_xpath("tagline", "//div[@class='tagline']/text()")
     item.add_xpath("location", "//span[@class='location']/text()")
     item.add_xpath("skills", "//div[@class='skills-bar left']/a/text()") # will return an array
     yield item.load_item()