Python FashionwebscrapingItem示例，fashionWebScraping.items.FashionwebscrapingItem Python示例

示例#1

0

显示文件

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        jsonresponse = json.loads(response.text)

        #content=response.xpath('//div[@class="prdct-cntnr-wrppr"]')
        #print(content)
        # loop through the <li> elements with the "product-item" class name in the content
        for jsonItem in jsonresponse['CatalogList']['Items']:

            print(jsonItem)

            image_urls = []

            # get the product details and populate the items
            item['productId'] = jsonItem['ModelId']
            item['productName'] = jsonItem['ProductDescription']

            item['priceOriginal'] = jsonItem['OldPrice']
            item['priceSale'] = jsonItem['Price']

            item['imageLink'] = jsonItem['DefaultOptionImageUrl']
            item['productLink'] = "https://www.lcwaikiki.com" + jsonItem[
                'ModelUrl']

            image_urls.append(item['imageLink'])

            item['company'] = "LCWAIKIKI"
            item['gender'] = response.meta['gender']

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#2

0

显示文件

	def parse(self, response):

		item=FashionwebscrapingItem()
		
		sel = Selector(text=response.body)

		fullContent=sel.xpath('//div[@class="product-item plp-large-images"]')
			

		for content in fullContent:

			image_urls = []
			
			item['company']="KOTON"
			item['gender']=response.meta['gender']
			item['productName']=content.xpath('@data-name').extract_first()
			item['imageLink']=content.xpath('.//div[@class="swiper-slide"]/img/@data-src').extract_first()
			item['productLink']="https://www.koton.com"+content.xpath('.//a/@href').extract_first()
			
			image_urls.append(item['imageLink'])


			item['priceOriginal']=content.xpath('.//span[@class="firstPrice"]/text()').extract_first()
			item['priceSale']=content.xpath('.//span[@class="firstPrice"]/text()').extract_first()
			
			if item['priceOriginal']==None:
				item['priceOriginal']=content.xpath('.//span[@class="insteadPrice"]/s/text()').extract_first()
				item['priceSale']=content.xpath('.//span[@class="newPrice"]/text()').extract_first()
				
			item['productId']=content.xpath('.//div[@class="my-fav-icon"]/@data-product').extract_first()
			
			if item['productId']!= None:
				yield (item)
				yield ImgData(image_urls=image_urls)

示例#3

0

显示文件

文件： fashionMORHIPO.py 项目： yuriimed/FashionSearch

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        content = response.xpath('//ol[@class="ProductList"]')
        print(content)
        # loop through the <li> elements with the "product-item" class name in the content
        for product_content in content.xpath('.//li'):
            print(product_content)

            image_urls = []

            # get the product details and populate the items
            item['productId'] = product_content.xpath(
                './/input/@data-productid').extract_first()
            item['productName'] = product_content.xpath(
                './/img/@title').extract_first()

            item['priceSale'] = product_content.xpath(
                './/span[@class="text-danger"]/text()').extract_first()

            item['priceOriginal'] = product_content.xpath(
                './/span[starts-with(@class,"act_price text-muted")]/s/text()'
            ).extract_first()

            if item['priceOriginal'] == None:
                item['priceOriginal'] = product_content.xpath(
                    './/span[starts-with(@class,"prd_price")]/strong/text()'
                ).extract_first()

            if item['priceSale'] == None:
                item['priceSale'] = product_content.xpath(
                    './/span[@class="badge-price"]/text()').extract_first()

            if item['priceSale'] == None:
                item['priceSale'] = item['priceOriginal']

            item['imageLink'] = product_content.xpath(
                './/img/@data-srcset').extract_first()
            item[
                'productLink'] = "https://www.morhipo.com" + product_content.xpath(
                    './/a/@href').extract_first()

            #image_urls.append(item['imageLink'])

            item['company'] = "MORHIPO"
            item['gender'] = response.meta['gender']

            if item['productId'] == None:
                break

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#4

0

显示文件

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        content = response.xpath(
            '//div[@class="list-content js-list-products three"]')

        # loop through the <li> elements with the "product-item" class name in the content
        for product_content in content.xpath(
                './/div[@class="col-sm-4 col-xs-6 padding-lg list-content-product-item"]'
        ):

            image_urls = []

            # get the product details and populate the items
            item['productId'] = product_content.xpath(
                './/div[@class="js-product-wrapper"]/@data-sku').extract_first(
                )
            item['productName'] = product_content.xpath(
                './/span[@class="product-name"]/text()').extract_first()

            item['priceOriginal'] = product_content.xpath(
                './/span[@class="product-price line-through"]/text()'
            ).extract_first()

            item['priceSale'] = product_content.xpath(
                './/span[@class="product-sale-price"]/text()').extract_first()

            if item['priceOriginal'] == None:
                item['priceOriginal'] = item['priceSale']

            item['imageLink'] = product_content.xpath(
                './/img/@src').extract_first()
            item[
                'productLink'] = "https://www.derimod.com.tr" + product_content.xpath(
                    './/a/@href').extract_first()

            #image_urls.append(item['imageLink'])

            item['company'] = "DERIMOD"
            item['gender'] = response.meta['gender']

            if item['productId'] == None:
                break

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#5

0

显示文件

文件： fashionHM.py 项目： yuriimed/FashionSearch

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        content = response.xpath('//ul[@class="products-listing small"]')

        # loop through the <li> elements with the "product-item" class name in the content
        for product_content in content.xpath('//li[@class="product-item"]'):

            image_urls = []

            # get the product details and populate the items
            item['productId'] = product_content.xpath(
                './/article[@class="hm-product-item"]/@data-articlecode'
            ).extract_first()
            item['productName'] = product_content.xpath(
                './/a[@class="link"]/text()').extract_first()

            item['priceOriginal'] = product_content.xpath(
                './/span[@class="price regular"]/text()').extract_first()
            item['priceSale'] = product_content.xpath(
                './/span[@class="price sale"]/text()').extract_first()

            if item['priceSale'] == None:
                item['priceSale'] = item['priceOriginal']

            item[
                'productLink'] = "https://www2.hm.com" + product_content.xpath(
                    './/a[@class="link"]/@href').extract_first()
            item['imageLink'] = "https:" + product_content.xpath(
                './/img/@data-src').extract_first()

            image_urls.append(item['imageLink'])

            #item['image_urls']=image_urls

            item['company'] = "HM"

            item['gender'] = response.meta['gender']

            if item['productId'] == None:
                break

            print(item['productId'])

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#6

0

显示文件

	def parse_product_pages(self,response):

		item=FashionwebscrapingItem()

		

		# Get the HTML block where all the products are listed
		# <ul> HTML element with the "products-listing small" class name
		content=response.xpath('//ul[@id="productsList"]')

		# loop through the <li> elements with the "product-item" class name in the content
		for product_content in content.xpath('.//li'):
		
			image_urls = []

			# get the product details and populate the items
			item['productId']=product_content.xpath('.//a/@data-product-id').extract_first()
			item['productName']=product_content.xpath('.//a/@data-product-name').extract_first()

			item['priceSale']=product_content.xpath('.//a/@data-product-price').extract_first()

			item['priceOriginal']=product_content.xpath('.//p[@class="price"]/del/text()').extract_first()


			if item['priceOriginal']==None:
				item['priceOriginal']=item['priceSale']



			item['imageLink']=product_content.xpath('.//img/@data-original').extract_first()		
			if item['imageLink']==None:
				item['imageLink']=product_content.xpath('.//img/@src').extract_first()

			item['productLink']=product_content.xpath('.//a[@class="productClickClass"]/@href').extract_first()
			
			image_urls.append(item['imageLink'])


			item['company']="MODANISA"
			item['gender']=response.meta['gender']

			
			if item['productId']==None:
				break


			yield (item)
			yield ImgData(image_urls=image_urls)

示例#7

0

显示文件

文件： fashionYARGICI.py 项目： yuriimed/FashionSearch

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        content = response.xpath('//div[@class="row"]')

        # loop through the <li> elements with the "product-item" class name in the content
        for product_content in content.xpath(
                './/div[@class="col-6 col-xs-6 col-sm-6 col-md-4 col-lg-4 col-xl-4 product-grid-item-container p-0"]'
        ):

            image_urls = []

            # get the product details and populate the items
            item['productId'] = product_content.xpath(
                './/a/@data-fav-check').extract_first()
            item['productName'] = product_content.xpath(
                './/img/@alt').extract_first()

            item['priceOriginal'] = product_content.xpath(
                './/li[@class="list-inline-item mr-0"]/s/text()'
            ).extract_first()

            item['priceSale'] = product_content.xpath(
                './/li[@class="list-inline-item"]/span/text()').extract_first(
                )

            if item['priceOriginal'] == None:
                item['priceOriginal'] = item['priceSale']

            item['imageLink'] = product_content.xpath(
                './/img/@data-original').extract_first()
            item[
                'productLink'] = "https://www.yargici.com" + product_content.xpath(
                    './/a/@href').extract_first()

            image_urls.append(item['imageLink'])

            item['company'] = "YARGICI"
            item['gender'] = response.meta['gender']

            if item['productId'] == None:
                break

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#8

0

显示文件

	def parse_product_pages(self,response):

		item=FashionwebscrapingItem()

		# Get the HTML block where all the products are listed
		# <ul> HTML element with the "products-listing small" class name
		content=response.xpath('//div[@class="col col-4 col-sm-6 col-xs-12 productItem ease"]')
		print(content)
		# loop through the <li> elements with the "product-item" class name in the content
		for product_content in content:

			
			image_urls = []

			# get the product details and populate the items
			item['productId']=product_content.xpath('.//div[@class="variantOverlay"]/@data-id').extract_first()
			item['productName']=product_content.xpath('.//a[@class="col col-12 productDescription detailLink"]/@title').extract_first()

			
			item['priceOriginal']=product_content.xpath('.//div[@class="discountedPrice"]/text()').extract_first()

			item['priceSale']=product_content.xpath('.//div[@class="currentPrice"]/text()').extract_first()

			

			if item['priceOriginal']==None:
				item['priceOriginal']=item['priceSale']



			item['imageLink']=product_content.xpath('.//span[@itemprop="image"]/@content').extract_first()			
			item['productLink']="https://www.matmazel.com"+product_content.xpath('.//a/@href').extract_first()
			
			image_urls.append(item['imageLink'])


			item['company']="MATMAZEL"
			item['gender']=response.meta['gender']

			
			if item['productId']==None:
				break

			yield (item)
			yield ImgData(image_urls=image_urls)

示例#9

0

显示文件

	def parse_product_pages(self,response):

		item=FashionwebscrapingItem()

		# Get the HTML block where all the products are listed
		# <ul> HTML element with the "products-listing small" class name
		content=response.xpath('//div[starts-with(@class,"product-list-item")]')
		
		# loop through the <li> elements with the "product-item" class name in the content
		for product_content in content:

			image_urls = []

			# get the product details and populate the items
			item['productId']=product_content.xpath('.//a/@data-id').extract_first()
			item['productName']=product_content.xpath('.//img/@title').extract_first()

			
			item['priceSale']=product_content.xpath('.//ins[@class="price-payable"]/text()').extract_first()

			item['priceOriginal']=product_content.xpath('.//del[@class="price-psfx"]/text()').extract_first()

			

			if item['priceOriginal']==None:
				item['priceOriginal']=item['priceSale']



			item['imageLink']=product_content.xpath('.//img/@data-original').extract_first()			
			item['productLink']="https://www.boyner.com.tr"+product_content.xpath('.//a/@href').extract_first()
			
			image_urls.append(item['imageLink'])


			item['company']="BOYNER"
			item['gender']=response.meta['gender']

			
			if item['productId']==None:
				break

			yield (item)
			yield ImgData(image_urls=image_urls)

示例#10

0

显示文件

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        jsonresponse = json.loads(response.text)

        #content=response.xpath('//div[@class="prdct-cntnr-wrppr"]')
        #print(content)
        # loop through the <li> elements with the "product-item" class name in the content
        for jsonItem in jsonresponse['result']['products']:

            print(jsonItem)

            image_urls = []

            # get the product details and populate the items
            item['productId'] = jsonItem['id']
            item['productName'] = jsonItem['name']

            item['priceOriginal'] = str(
                jsonItem['price']['originalPrice']) + " TL"
            item['priceSale'] = str(
                jsonItem['price']['discountedPrice']) + " TL"

            item['imageLink'] = "https://www.trendyol.com" + jsonItem[
                'images'][0]
            item['productLink'] = "https://www.trendyol.com" + jsonItem['url']

            image_urls.append(item['imageLink'])

            item['company'] = "TRENDYOL"
            item['gender'] = response.meta['gender']

            yield (item)
            yield ImgData(image_urls=image_urls)

示例#11

0

显示文件

文件： scraper.py 项目： yashmallik/Python-projects

 def parse_product_pages(self,response):

  item=FashionwebscrapingItem()

示例#12

0

显示文件

文件： fashionHEPSIBURADA.py 项目： yuriimed/FashionSearch

    def parse_product_pages(self, response):

        item = FashionwebscrapingItem()

        # Get the HTML block where all the products are listed
        # <ul> HTML element with the "products-listing small" class name
        content = response.xpath('//ul')
        print(content)
        # loop through the <li> elements with the "product-item" class name in the content
        for product_content in content.xpath(
                '//li[@class="search-item col lg-1 md-1 sm-1  custom-hover not-fashion-flex"]'
        ):

            image_urls = []

            # get the product details and populate the items
            item['productId'] = product_content.xpath(
                './/a/@data-productid').extract_first()
            item['productName'] = product_content.xpath(
                './/h3/@title').extract_first()

            item['priceOriginal'] = product_content.xpath(
                './/del[@class="price old product-old-price"]/text()'
            ).extract_first()

            if item['priceOriginal'] == None:
                item['priceOriginal'] = product_content.xpath(
                    './/span[@class="price product-price"]/text()'
                ).extract_first()

            item['priceSale'] = product_content.xpath(
                './/span[@class="price product-price"]/text()').extract_first(
                )

            item['priceSale'] = product_content.xpath(
                './/div[@class="price-value"]').extract_first()

            if item['priceSale'] == None:
                item['priceSale'] = product_content.xpath(
                    './/span[@class="price product-price"]/text()'
                ).extract_first()

            item['priceSale'] = ''.join((ch if ch in '0123456789,.' else '')
                                        for ch in item['priceSale'])

            item['imageLink'] = product_content.xpath(
                './/img/@src').extract_first()
            item[
                'productLink'] = "https://www.hepsiburada.com" + product_content.xpath(
                    './/a/@href').extract_first()

            image_urls.append(item['imageLink'])

            item['company'] = "HEPSIBURADA"
            item['gender'] = response.meta['gender']

            if item['productId'] == None:
                break

            yield (item)
            yield ImgData(image_urls=image_urls)