def parse_product_size(self, response): designer_info = self.designer_info_dict[response.meta['uid']] designer = designer_info.designer product_size_url = response.meta['product_size_url'] product_id = response.meta['product_id'] self.logger.info(u'parse product size[%s] response, response status: %d', product_size_url, response.status) product = filter(lambda p: p['uid'] == product_id, designer['products'])[0] sel = Selector(text=skutils.get_first(response.xpath('//script[@id="sizechart-modal"]/text()').extract())) tr_nodes = sel.xpath('//table[@class="size-conversion-table"]//tr') design_size = [[skutils.get_first(td.xpath('text()').extract()) for td in tr_node.xpath('td')] for tr_node in tr_nodes] product['design_size'] = design_size return self.try_return_designer_if_last_product_detail_page(response.meta['uid'])
def parse_designer(self, response): self.logger.info('Hi, this is designer page! %s', response.url) designer = DesignerItem() uid = skutils.get_first(response.xpath('//ul[@class="dropdown-menu"]/li/@data-designer-id').extract()) name = skutils.get_first(response.xpath('//div[@class="designer-info-wrap"]/h1/text()').extract()) desc_part1 = skutils.get_first(response.xpath('//div[@class="designer-info-wrap"]/p/text()').extract()) desc_part2 = skutils.get_first( response.xpath('//div[@class="designer-info-wrap"]/p/span/text()').extract()) desc = desc_part1.strip() if desc_part1 else "" + desc_part2.strip() if desc_part2 else "" img_url = skutils.get_first(response.xpath('//div[@class="designer-avatar"]/img/@src').extract()) nation = skutils.get_first(response.xpath('//div[@class="designer-avatar"]/div/text()').extract()) designer['uid'] = uid.strip() if uid else "" designer['name'] = name.strip() if name else "" designer['url'] = response.url designer['desc'] = desc designer['img_url'] = (CarnetSpider.DOMAIN_PREFIX + img_url.strip()) if img_url else "" designer['nation'] = nation.strip() if nation else "" designer['product_detail_urls'] = [] designer['products'] = [] designer['file_urls'] = [designer['img_url']] # for download uid = designer['uid'] if not uid: # designer have no products return designer self.designer_info_dict[uid] = DesignerInfo(uid, designer) products_request = self.make_products_list_request(designer, 0, 0) return products_request
def parse_product_detail(self, response): designer_info = self.designer_info_dict[response.meta['uid']] designer = designer_info.designer detail_url = response.meta['detail_url'] self.logger.info('parse product detail[%s] response, response status: %d', detail_url, response.status) product = ProductItem() name = skutils.get_first( response.xpath('//div[@class="product-info"]/h1[@class="hidden-xs"]/text()').extract()) price = skutils.get_first( response.xpath('//span[@class="price hidden-xs"]/span[@class="cdm-price-1"]/text()').extract()) if not price: price = skutils.get_first( response.xpath('//span[@class="price hidden-xs"]/span[@class="cdm-price-2"]/text()').extract()) original_price = skutils.get_first( response.xpath('//span[@class="bottom-price"]/span[@class="real-price cdm-price-3"]/text()').extract()) size_nodes = response.xpath('//select[@id="size-select"]/option') size_info = [{ 'size': skutils.get_first(s.xpath('text()').extract()).strip(), 'product_id': skutils.get_first(s.xpath('@data-product-id').extract()), 'stock': skutils.get_first(s.xpath('@data-stock').extract()), 'selected': (skutils.get_first(s.xpath('@selected').extract()) == "selected") } for s in size_nodes] desc = response.xpath('//div[@class="panel-collapse in hidden-xs"]/div[@class="panel-body"]//text()').extract() design_size = skutils.get_first(response.xpath('//table[@class="table table-bordered"]').extract()) img_url = response.xpath('//a[@data-image]/@data-image').extract() product['uri'] = detail_url product['name'] = name.strip() if name else "" product['price'] = price.strip() if price else "" product['original_price'] = original_price.strip() if original_price else "" product['size_info'] = size_info product['current_size'] = filter(lambda x: x.get('selected'), size_info)[0]['size'] product['desc'] = " ".join(desc).strip() product['design_size'] = skutils.remove_html_attributes(design_size) product['img_url'] = [CarnetSpider.DOMAIN_PREFIX + x.strip() for x in img_url] product['stock'] = designer_info.products[detail_url]['stock'] product['uid'] = designer_info.products[detail_url]['uid'] designer['file_urls'].extend(product['img_url']) # for download designer['products'].append(product) return self.try_return_designer_if_last_product_detail_page(response.meta['uid'])
def parse_designer(self, response): self.logger.info(u'Hi, this is designer page! %s', response.url) designer = SsenseDesignerItem() name = skutils.get_first( response.xpath('//div[contains(@class, "browsing-designer-header-content")]/h1/text()').extract()) desc = skutils.get_first( response.xpath('//div[contains(@class, "browsing-designer-header-content")]/p/text()').extract()) self.index += 1 designer['uid'] = self.index designer['name'] = name.strip() if name else "" designer['url'] = response.url designer['desc'] = desc designer['product_detail_urls'] = [] designer['products'] = [] designer['file_urls'] = [] uid = designer['uid'] designer_info = DesignerInfo(uid, designer) self.designer_info_dict[uid] = designer_info product_detail_urls = [SsenseSpider.DOMAIN_PREFIX + x for x in response.xpath('//div[@class="browsing-product-item"]/a/@href').extract()] if product_detail_urls: SsenseSpider.filter_product(designer, product_detail_urls) product_detail_urls = designer['product_detail_urls'] designer_info.remain_detail_page = len(product_detail_urls) for detail_url in product_detail_urls: yield self.make_products_detail_request(detail_url, designer) else: # designer don't have products yield designer
def parse(self, response): designer_info = self.designer_info_dict[1] designer = designer_info.designer url = response.url detail_url = url.split('/')[-1] self.logger.info(u'parse product detail[%s] response, response status: %d', url, response.status) product = PortraitProductItem() name = skutils.get_first(response.xpath('//div[@class="product-name"]/h1/text()').extract()) img_url = [] img_url.extend(response.xpath('//div[@class="product-image"]//img/@src').extract()) img_url.extend(response.xpath('//div[@class="product-image-bottom"]//img/@src').extract()) self.index += 1 product['uid'] = str(self.index) product['uri'] = detail_url product['name'] = name product['img_url'] = img_url designer['file_urls'].extend(product['img_url']) # for download designer['products'].append(product) return self.try_return_designer_if_last_product_detail_page(1)
def parse_product_detail(self, response): designer_info = self.designer_info_dict[response.meta['uid']] designer = designer_info.designer detail_url = response.meta['detail_url'] self.logger.info(u'parse product detail[%s] response, response status: %d', detail_url, response.status) product = SsenseProductItem() product_nodes = response.xpath('//div[@class="product-description-container"]') uid = skutils.get_first(product_nodes.xpath('@data-product-id').extract()) name = skutils.get_first(product_nodes.xpath('@data-product-name').extract()) sku = skutils.get_first(product_nodes.xpath('@data-product-sku').extract()) category_id = skutils.get_first(product_nodes.xpath('@data-product-category-id').extract()) price = skutils.get_first(product_nodes.xpath('@data-product-price').extract()) size_nodes = response.xpath('//select[@id="size"]/option[position()>1]') size_info = [{ 'size': skutils.get_first(s.xpath('text()').extract()).strip(), 'stock': '0' if skutils.get_first(s.xpath('@disabled').extract()) == 'disabled' else None } for s in size_nodes] desc = response.xpath('//p[contains(@class, "product-description-text")]//text()').extract() img_url = response.xpath('//div[@class="image-wrapper"]//img/@data-src').extract() product['uri'] = detail_url product['name'] = name.strip() if name else "" product['price'] = "$" + price.strip() if price else "" product['size_info'] = size_info product['desc'] = " ".join(desc).strip() product['img_url'] = img_url product['uid'] = uid product['sku'] = sku product['category_id'] = category_id designer['file_urls'].extend(product['img_url']) # for download designer['products'].append(product) return self.make_products_size_request(designer, uid, category_id)