Exemplo n.º 1
0
    def parse_designer(self, response):
        self.logger.info('Hi, this is designer page! %s', response.url)
        designer = CeremonyDesignerItem()

        uid = skutils.retrieve_url_param(response.url, 'designerid')
        name = response.xpath('//div[@class="productName"]/a/text()').extract_first()

        designer['uid'] = uid
        designer['name'] = name.strip() if name else ""
        designer['url'] = response.url
        designer['desc'] = designer['name']
        designer['product_detail_urls'] = []
        designer['products'] = []
        designer['file_urls'] = []

        designer_info = DesignerInfo(uid, designer)
        self.designer_info_dict[uid] = designer_info

        # 解析产品列表
        # total = int(response.xpath('//div[@class="sortby_showall"]/a/text()').extract()[-1])
        products_uri = [uri[1:] for uri in response.xpath('//div[@class="productThumb"]/a/@href').extract()]

        products = [{
                        'uri': uri,
                        'uid': skutils.retrieve_url_param(uri, 'productid')
                    } for uri in products_uri]

        designer['product_detail_urls'] = products_uri
        # designer_info.total = total
        products_dict = {p['uri']: p for p in products}
        designer_info.products.update(products_dict)

        return self.start_request_product_detail_page(response, designer_info)
Exemplo n.º 2
0
    def parse_product_detail(self, response):
        designer_info = self.designer_info_dict[response.meta['uid']]
        designer = designer_info.designer
        detail_url = response.meta['detail_url']
        self.logger.info('parse product detail[%s] response, response status: %d', detail_url, response.status)
        product = CeremonyProductItem()

        uid = skutils.retrieve_url_param(response.url, 'productid')
        product_nodes = response.xpath('//div[@class="product_right_info"]')
        name = product_nodes.xpath('span[@class="pname"]/text()').extract_first()
        price = product_nodes.xpath('div[@class="productprice"]/text()').extract_first().strip()
        original_price = None
        if price == '$':
            price = product_nodes.xpath('div[@class="productprice"]/span[2]/text()').extract_first()
            original_price = '$ ' + product_nodes.xpath('div[@class="productprice"]/span[1]/text()').extract_first()

        size_lis = product_nodes.xpath('//ul[@class="ul_SizesColors"]/li')
        size_nodes = [{'attr_name': re.search('^li_(\w+) li', li.xpath('@class').extract_first()).groups()[0],
                       'attr_value': li.xpath('@title').extract_first(),
                       'product_id': li.xpath('span[@class="productid"]/text()').extract_first(),
                       } for li in size_lis]
        # self.logger.debug("size_nodes: %s", str(size_nodes))

        def reduce_acc(acc, size_node):
            product_id = size_node['product_id']
            if product_id not in acc:
                acc[product_id] = {'product_id': product_id,
                                   'attrs': [(size_node['attr_name'], size_node['attr_value'])]}
            else:
                acc[product_id]['attrs'].append((size_node['attr_name'], size_node['attr_value']))
            return acc

        size_info = reduce(reduce_acc, size_nodes, {}).values()
        # self.logger.debug("size_info: %s", str(size_info))

        desc_node = product_nodes.xpath('//div[@class="plproducttab plproductdetails"]')
        desc = '\n'.join(desc_node.xpath('text()').extract()).strip()
        desc += '\n' + desc_node.xpath('//span[@class="smallfont"]/text()').extract_first().strip()

        design_size = '\n'.join(product_nodes.xpath('//div[@class="plproducttab plproductdescription"]/p/text()').extract())

        img_url = response.xpath('//div[@class="pili"]/img/@src').extract()

        product['uri'] = detail_url
        product['name'] = name.strip() if name else ""
        product['price'] = price
        product['original_price'] = original_price
        product['size_info'] = size_info
        product['desc'] = desc
        product['design_size'] = design_size
        product['img_url'] = [x.replace('menu_', '') for x in img_url]
        product['uid'] = uid

        designer['file_urls'].extend(product['img_url'])  # for download

        designer['products'].append(product)

        return self.try_return_designer_if_last_product_detail_page(response.meta['uid'])