def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name jsonresponse = json.loads(response.text) #content=response.xpath('//div[@class="prdct-cntnr-wrppr"]') #print(content) # loop through the <li> elements with the "product-item" class name in the content for jsonItem in jsonresponse['CatalogList']['Items']: print(jsonItem) image_urls = [] # get the product details and populate the items item['productId'] = jsonItem['ModelId'] item['productName'] = jsonItem['ProductDescription'] item['priceOriginal'] = jsonItem['OldPrice'] item['priceSale'] = jsonItem['Price'] item['imageLink'] = jsonItem['DefaultOptionImageUrl'] item['productLink'] = "https://www.lcwaikiki.com" + jsonItem[ 'ModelUrl'] image_urls.append(item['imageLink']) item['company'] = "LCWAIKIKI" item['gender'] = response.meta['gender'] yield (item) yield ImgData(image_urls=image_urls)
def parse(self, response): item=FashionwebscrapingItem() sel = Selector(text=response.body) fullContent=sel.xpath('//div[@class="product-item plp-large-images"]') for content in fullContent: image_urls = [] item['company']="KOTON" item['gender']=response.meta['gender'] item['productName']=content.xpath('@data-name').extract_first() item['imageLink']=content.xpath('.//div[@class="swiper-slide"]/img/@data-src').extract_first() item['productLink']="https://www.koton.com"+content.xpath('.//a/@href').extract_first() image_urls.append(item['imageLink']) item['priceOriginal']=content.xpath('.//span[@class="firstPrice"]/text()').extract_first() item['priceSale']=content.xpath('.//span[@class="firstPrice"]/text()').extract_first() if item['priceOriginal']==None: item['priceOriginal']=content.xpath('.//span[@class="insteadPrice"]/s/text()').extract_first() item['priceSale']=content.xpath('.//span[@class="newPrice"]/text()').extract_first() item['productId']=content.xpath('.//div[@class="my-fav-icon"]/@data-product').extract_first() if item['productId']!= None: yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content = response.xpath('//ol[@class="ProductList"]') print(content) # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath('.//li'): print(product_content) image_urls = [] # get the product details and populate the items item['productId'] = product_content.xpath( './/input/@data-productid').extract_first() item['productName'] = product_content.xpath( './/img/@title').extract_first() item['priceSale'] = product_content.xpath( './/span[@class="text-danger"]/text()').extract_first() item['priceOriginal'] = product_content.xpath( './/span[starts-with(@class,"act_price text-muted")]/s/text()' ).extract_first() if item['priceOriginal'] == None: item['priceOriginal'] = product_content.xpath( './/span[starts-with(@class,"prd_price")]/strong/text()' ).extract_first() if item['priceSale'] == None: item['priceSale'] = product_content.xpath( './/span[@class="badge-price"]/text()').extract_first() if item['priceSale'] == None: item['priceSale'] = item['priceOriginal'] item['imageLink'] = product_content.xpath( './/img/@data-srcset').extract_first() item[ 'productLink'] = "https://www.morhipo.com" + product_content.xpath( './/a/@href').extract_first() #image_urls.append(item['imageLink']) item['company'] = "MORHIPO" item['gender'] = response.meta['gender'] if item['productId'] == None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content = response.xpath( '//div[@class="list-content js-list-products three"]') # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath( './/div[@class="col-sm-4 col-xs-6 padding-lg list-content-product-item"]' ): image_urls = [] # get the product details and populate the items item['productId'] = product_content.xpath( './/div[@class="js-product-wrapper"]/@data-sku').extract_first( ) item['productName'] = product_content.xpath( './/span[@class="product-name"]/text()').extract_first() item['priceOriginal'] = product_content.xpath( './/span[@class="product-price line-through"]/text()' ).extract_first() item['priceSale'] = product_content.xpath( './/span[@class="product-sale-price"]/text()').extract_first() if item['priceOriginal'] == None: item['priceOriginal'] = item['priceSale'] item['imageLink'] = product_content.xpath( './/img/@src').extract_first() item[ 'productLink'] = "https://www.derimod.com.tr" + product_content.xpath( './/a/@href').extract_first() #image_urls.append(item['imageLink']) item['company'] = "DERIMOD" item['gender'] = response.meta['gender'] if item['productId'] == None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content = response.xpath('//ul[@class="products-listing small"]') # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath('//li[@class="product-item"]'): image_urls = [] # get the product details and populate the items item['productId'] = product_content.xpath( './/article[@class="hm-product-item"]/@data-articlecode' ).extract_first() item['productName'] = product_content.xpath( './/a[@class="link"]/text()').extract_first() item['priceOriginal'] = product_content.xpath( './/span[@class="price regular"]/text()').extract_first() item['priceSale'] = product_content.xpath( './/span[@class="price sale"]/text()').extract_first() if item['priceSale'] == None: item['priceSale'] = item['priceOriginal'] item[ 'productLink'] = "https://www2.hm.com" + product_content.xpath( './/a[@class="link"]/@href').extract_first() item['imageLink'] = "https:" + product_content.xpath( './/img/@data-src').extract_first() image_urls.append(item['imageLink']) #item['image_urls']=image_urls item['company'] = "HM" item['gender'] = response.meta['gender'] if item['productId'] == None: break print(item['productId']) yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self,response): item=FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content=response.xpath('//ul[@id="productsList"]') # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath('.//li'): image_urls = [] # get the product details and populate the items item['productId']=product_content.xpath('.//a/@data-product-id').extract_first() item['productName']=product_content.xpath('.//a/@data-product-name').extract_first() item['priceSale']=product_content.xpath('.//a/@data-product-price').extract_first() item['priceOriginal']=product_content.xpath('.//p[@class="price"]/del/text()').extract_first() if item['priceOriginal']==None: item['priceOriginal']=item['priceSale'] item['imageLink']=product_content.xpath('.//img/@data-original').extract_first() if item['imageLink']==None: item['imageLink']=product_content.xpath('.//img/@src').extract_first() item['productLink']=product_content.xpath('.//a[@class="productClickClass"]/@href').extract_first() image_urls.append(item['imageLink']) item['company']="MODANISA" item['gender']=response.meta['gender'] if item['productId']==None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content = response.xpath('//div[@class="row"]') # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath( './/div[@class="col-6 col-xs-6 col-sm-6 col-md-4 col-lg-4 col-xl-4 product-grid-item-container p-0"]' ): image_urls = [] # get the product details and populate the items item['productId'] = product_content.xpath( './/a/@data-fav-check').extract_first() item['productName'] = product_content.xpath( './/img/@alt').extract_first() item['priceOriginal'] = product_content.xpath( './/li[@class="list-inline-item mr-0"]/s/text()' ).extract_first() item['priceSale'] = product_content.xpath( './/li[@class="list-inline-item"]/span/text()').extract_first( ) if item['priceOriginal'] == None: item['priceOriginal'] = item['priceSale'] item['imageLink'] = product_content.xpath( './/img/@data-original').extract_first() item[ 'productLink'] = "https://www.yargici.com" + product_content.xpath( './/a/@href').extract_first() image_urls.append(item['imageLink']) item['company'] = "YARGICI" item['gender'] = response.meta['gender'] if item['productId'] == None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self,response): item=FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content=response.xpath('//div[@class="col col-4 col-sm-6 col-xs-12 productItem ease"]') print(content) # loop through the <li> elements with the "product-item" class name in the content for product_content in content: image_urls = [] # get the product details and populate the items item['productId']=product_content.xpath('.//div[@class="variantOverlay"]/@data-id').extract_first() item['productName']=product_content.xpath('.//a[@class="col col-12 productDescription detailLink"]/@title').extract_first() item['priceOriginal']=product_content.xpath('.//div[@class="discountedPrice"]/text()').extract_first() item['priceSale']=product_content.xpath('.//div[@class="currentPrice"]/text()').extract_first() if item['priceOriginal']==None: item['priceOriginal']=item['priceSale'] item['imageLink']=product_content.xpath('.//span[@itemprop="image"]/@content').extract_first() item['productLink']="https://www.matmazel.com"+product_content.xpath('.//a/@href').extract_first() image_urls.append(item['imageLink']) item['company']="MATMAZEL" item['gender']=response.meta['gender'] if item['productId']==None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self,response): item=FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content=response.xpath('//div[starts-with(@class,"product-list-item")]') # loop through the <li> elements with the "product-item" class name in the content for product_content in content: image_urls = [] # get the product details and populate the items item['productId']=product_content.xpath('.//a/@data-id').extract_first() item['productName']=product_content.xpath('.//img/@title').extract_first() item['priceSale']=product_content.xpath('.//ins[@class="price-payable"]/text()').extract_first() item['priceOriginal']=product_content.xpath('.//del[@class="price-psfx"]/text()').extract_first() if item['priceOriginal']==None: item['priceOriginal']=item['priceSale'] item['imageLink']=product_content.xpath('.//img/@data-original').extract_first() item['productLink']="https://www.boyner.com.tr"+product_content.xpath('.//a/@href').extract_first() image_urls.append(item['imageLink']) item['company']="BOYNER" item['gender']=response.meta['gender'] if item['productId']==None: break yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name jsonresponse = json.loads(response.text) #content=response.xpath('//div[@class="prdct-cntnr-wrppr"]') #print(content) # loop through the <li> elements with the "product-item" class name in the content for jsonItem in jsonresponse['result']['products']: print(jsonItem) image_urls = [] # get the product details and populate the items item['productId'] = jsonItem['id'] item['productName'] = jsonItem['name'] item['priceOriginal'] = str( jsonItem['price']['originalPrice']) + " TL" item['priceSale'] = str( jsonItem['price']['discountedPrice']) + " TL" item['imageLink'] = "https://www.trendyol.com" + jsonItem[ 'images'][0] item['productLink'] = "https://www.trendyol.com" + jsonItem['url'] image_urls.append(item['imageLink']) item['company'] = "TRENDYOL" item['gender'] = response.meta['gender'] yield (item) yield ImgData(image_urls=image_urls)
def parse_product_pages(self,response): item=FashionwebscrapingItem()
def parse_product_pages(self, response): item = FashionwebscrapingItem() # Get the HTML block where all the products are listed # <ul> HTML element with the "products-listing small" class name content = response.xpath('//ul') print(content) # loop through the <li> elements with the "product-item" class name in the content for product_content in content.xpath( '//li[@class="search-item col lg-1 md-1 sm-1 custom-hover not-fashion-flex"]' ): image_urls = [] # get the product details and populate the items item['productId'] = product_content.xpath( './/a/@data-productid').extract_first() item['productName'] = product_content.xpath( './/h3/@title').extract_first() item['priceOriginal'] = product_content.xpath( './/del[@class="price old product-old-price"]/text()' ).extract_first() if item['priceOriginal'] == None: item['priceOriginal'] = product_content.xpath( './/span[@class="price product-price"]/text()' ).extract_first() item['priceSale'] = product_content.xpath( './/span[@class="price product-price"]/text()').extract_first( ) item['priceSale'] = product_content.xpath( './/div[@class="price-value"]').extract_first() if item['priceSale'] == None: item['priceSale'] = product_content.xpath( './/span[@class="price product-price"]/text()' ).extract_first() item['priceSale'] = ''.join((ch if ch in '0123456789,.' else '') for ch in item['priceSale']) item['imageLink'] = product_content.xpath( './/img/@src').extract_first() item[ 'productLink'] = "https://www.hepsiburada.com" + product_content.xpath( './/a/@href').extract_first() image_urls.append(item['imageLink']) item['company'] = "HEPSIBURADA" item['gender'] = response.meta['gender'] if item['productId'] == None: break yield (item) yield ImgData(image_urls=image_urls)