Exemplo n.º 1
0
    def parse(self, response):

        newss = Selector(response).xpath(
            '//div[@class="swiper-wrapper trendNav orangeNav"]')
        # print news
        for news in newss:
            item = SamacharItem()
            item['title'] = news.xpath('div/a/text()').extract_first()
            item['url'] = 'http://www.bhaskar.com' + \
                news.xpath('div/a/@href').extract_first()
            item['img_title'] = news.xpath('div/a/@title').extract_first()
            if news.xpath('a/img/@src').extract_first():
                item['img_urls'] = news.xpath('a/img/@src').extract_first()
            else:
                item['img_urls'] = news.xpath(
                    'a/img/@data-original').extract_first()

            if item['url']:
                request = scrapy.Request(url=item['url'],
                                         callback=self.parse_detail_page,
                                         meta={'item': item},
                                         dont_filter=True)
            request.meta['item'] = item

            yield request
Exemplo n.º 2
0
    def parse(self, response):
     
        news = Selector(response).xpath("//div[@class='image-caption']/h3")
        print news
        for news in news:
            item = SamacharItem()
            item['title'] = news.xpath("normalize-space(a/text())").extract_first()
            item['url'] = "http://www.amarujala.com" +news.xpath("a/@href").extract_first()

            if item['url']:
                request = scrapy.Request(url=item['url'], callback=self.parse_detail_page, meta={'item':item}, dont_filter=True)   
            request.meta['item'] =item
            yield request
Exemplo n.º 3
0
    def parse(self, response):

        news = Selector(response).xpath(
            "//figcaption[@class='figure-caption']")
        print news
        for news in news:
            item = SamacharItem()
            item['title'] = news.xpath("a/span/text()").extract_first()
            item['url'] = news.xpath("a/@href").extract_first()

            if item['url']:
                request = scrapy.Request(url=item['url'],
                                         callback=self.parse_detail_page,
                                         meta={'item': item},
                                         dont_filter=True)
            request.meta['item'] = item
            yield request
Exemplo n.º 4
0
    def parse(self, response):

        news = Selector(response).xpath("//ul[@class='topicList']/li")
        print news
        for news in news:
            item = SamacharItem()
            item['title'] = news.xpath(
                "a/div[@class='protxt fr']/div[@class='h3']/text()"
            ).extract_first()
            item['url'] = 'https://www.jagran.com' + news.xpath(
                "a/@href").extract_first()

            if item['url']:
                request = scrapy.Request(url=item['url'],
                                         callback=self.parse_detail_page,
                                         meta={'item': item},
                                         dont_filter=True)
            request.meta['item'] = item
            yield request