Пример #1
0
    def parse_two(self, response):
        # pass
        item = MziItem()
        item3 = response.meta['item2']
        # urlstr = response.url.split('/')
        # pagenum = urlstr[-1]
        # print 'pagenum is **************************' + pagenum

        # if pagenum=='1':
        #     print 'response.url is 1----------------------------------->' + response.url
        #     newurl = urlstr[0] +'//' + urlstr[2] + '/' + urlstr[3]
        #     URL = newurl.xpath('//div[@class="main-image"]//img/@src').extract()[0]
        # else:
        #     URL = response.xpath('//div[@class="main-image"]//img/@src').extract()[0]
        # if pagenum=='1':
        #     response.url = urlstr[0] +'//' + urlstr[2] + '/' + urlstr[3]

        URL = response.xpath(
            '//div[@class="main-image"]//img/@src').extract()[0]

        item['image_urls'] = URL
        item['path'] = item3['path']
        item['fileName'] = item3['fileName']
        # print 'result is :-->' + item['image_urls'],item['path'],item['fileName']

        yield item
Пример #2
0
    def parse(self, response):
        # pass
        pages = response.xpath('//*[@class="all"]//ul//li//p[@class="url"]/a')
        # pages = response.xpath('//*[@class="all"]//ul//li//p[@class="url"]/a')[:1]
        # pages = ['http://www.mzitu.com/104557']
        items = []
        
        for page in pages:                        
            item = MziItem()
            # item['fileName'] = page.xpath('./text()').extract()[0]
            item['siteURL'] = page.xpath('./@href').extract()[0]
            item['fileName'] = item['siteURL'].split('/')[-1]

            items.append(item)            

        for item in items:
            # yield item
            yield Request(url=item['siteURL'],callback=self.parse_one,meta={'item1':item})
Пример #3
0
    def parse_one(self,response):
        # pass
        item2 = response.meta['item1']        

        num = response.xpath('//div[@class="pagenavi"]/a/span/text()').extract()[-2]
        pageURL = response.url
        items = []        

        for i in range(1,int(num)+1):
            item = MziItem()
            # item['fileName'] = item2['fileName']
            # item['path'] = response.url.split('/')[-1]
            # item['path'] = item['fileName'] + '/'+ str(i) + '.jpg'
            item['pageURL'] = pageURL + '/' + str(i)
            item['isdown'] = '0'

            # yield item['pageURL']

            
            items.append(item)
           
        for item in items:
            # yield Request(url=item['pageURL'],dont_filter=True,meta={'item2':item},callback=self.parse_two)
            yield item
Пример #4
0
    def parse_one(self, response):
        # pass
        item2 = response.meta['item1']
        # source = requests.get(response.url)
        # html = source.text.encode('utf-8')

        num = response.xpath(
            '//div[@class="pagenavi"]/a/span/text()').extract()[-2]
        pageURL = response.url
        items = []
        # print 'num is :--->' + num
        # print 'pageURL:--->' + pageURL
        # print 'filename :--->' +item2['fileName']

        for i in range(1, int(num) + 1):
            item = MziItem()
            item['fileName'] = item2['fileName']
            # item['path'] = response.url.split('/')[-1]
            item['path'] = item['fileName'] + '/' + str(i) + '.jpg'
            item['pageURL'] = pageURL + '/' + str(i)

            # if int(i) == 1:
            #     item['pageURL'] = pageURL
            #     print 'pagenum is **************************' + item['pageURL']
            # else:
            #     item['pageURL'] = pageURL + '/' + str(i)

            items.append(item)
            # if int(i)==1:
            #     print 'pageURL is 1----------->' + item['pageURL']

        for item in items:
            yield Request(url=item['pageURL'],
                          dont_filter=True,
                          meta={'item2': item},
                          callback=self.parse_two)