示例#1
0
    def content(self, response):
        item = AoisolasItem()
        item['name'] = response.css(".content h5::text").extract_first()
        item['ImgUrl'] = response.css(".content-pic img::attr(src)").extract()
        yield item
        # 提取图片,存入文件夹
        # print(item['ImgUrl'])
        next_url = response.css(".page-ch:last-child::attr(href)").extract_first()

        if next_url is not None:
            # 下一页
            yield response.follow(next_url, callback=self.content)
示例#2
0
    def content(self, response):
        item = AoisolasItem()
        item['name'] = response.css(".bgff h1::text").extract_first()
        item['ImgUrl'] = response.css(".pic_tupian img::attr(src)").extract()
        item['group'] = response.meta['group']
        yield item
        # 提取图片,存入文件夹
        # print(item['ImgUrl'])
        next_url = response.css(
            "a:contains('下一页')::attr(href)").extract_first()
        next_url = parse.urljoin(response.url, next_url)

        if next_url is not None:
            # 下一页
            yield scrapy.Request(next_url,
                                 callback=self.content,
                                 meta={"group": item['group']})
示例#3
0
    def content(self, response):
        item = AoisolasItem()
        data = response.body
        soup = BeautifulSoup(data, 'lxml')
        title = soup.find('h1', {'class': 'articleV4Tit'}).get_text()
        img_url = soup.find('img', {'class': 'IMG_show'}).get('src')

        img_urls = []
        #titles.append(title)
        img_urls.append(img_url)
        item['title'] = title
        item['ImgUrl'] = img_urls
        yield item
        next_page = soup.find('a', {'class': 'next-page-a'})
        if next_page is not None:
            next_page_url = next_page.get('href')

            yield response.follow(next_page_url, callback=self.content)