Exemplo n.º 1
0
def childes(
    selector: SelectorList,
    parent_tag: str,
) -> SelectorList:
    if not isinstance(parent_tag, str):
        raise TypeError('Given `parent_tag` is not `str` object.')
    childes_selector = SelectorList()
    iterate_selector_string_template = parent_tag + ' > :nth-child({i})'
    i = 1
    # starting the iteration
    while True:
        child = selector.css(iterate_selector_string_template.format(i=i))
        if child:
            childes_selector.append(child)
            i += 1
        else:
            return childes_selector
Exemplo n.º 2
0
    def parse_product(self, response):
        pic = response.xpath('//ul[@id="J_UlThumb"]/li/a/img/@src').extract()
        detail = response.xpath('//div[@class="attributes" and @id="attributes"]').extract()
        color_pics = response.xpath('//dd/ul[contains(@class,"tm-clear J_TSaleProp tb-img")]/li')
        total_list = response.xpath('//dd/ul[contains(@class,"tm-clear J_TSaleProp")]/li')
        json_getter = response.xpath('//div[@class="tm-clear"]/script[3]').extract()
        extract_pic = 
                
        print len(extract_pic)

        size_list = SelectorList()
        
        for i in total_list:
            if i.xpath('a/span/text()').extract()[0] not in color_pics.xpath('a/span/text()').extract():
                size_list.append(i)
#        cnt = 0
#        sku_type = 0
#        for i in total_list:
#            cnt = cnt + 1
#            if i.xpath('a/span/text()').extract()[0] not in color_pics.xpath('a/span/text()').extract():
#                size_list.append(i)
#                sku_type = cnt
#        if sku_type > len(total_list)-len(color_pics):
#            sku_type = 0
#        else:
#            sku_type = 1
#
#        if len(size_list)==0:
#            sku_type = 3
#
        # cut_line = len(total_list)-len(color_pics)

        str_val_map = {}

        for i in range(len(total_list)):
            v = total_list[i].xpath('@data-value').extract()
            n = total_list[i].xpath('a/span/text()').extract()
            if len(n)>0 and len(v)>0:
                str_val_map[n[0]] = v[0]
            else:
                print "Value Error"
        #size_list = total_list[:cut_line]
        st = json_getter[0].split('TShop.Setup(')[1]
        info_dict = self.python_getter(st)
        skuMap = info_dict["valItemInfo"]["skuMap"]

        product_img = []
        for i in pic:
            tmp = self.resize_pic(i)
            product_img.append(tmp)
    
        # color_set = selen.single_page(response.url)
        color_set = []
        for i in color_pics:
            color = {}
            color['color'] = i.xpath('@title').extract()[0]
            try:
                tmp_str = i.xpath('a/@style').extract()[0]
                tmp_str = tmp_str[tmp_str.find('(')+3:tmp_str.rfind(')')]
                tmp_str = tmp_str[0:tmp_str.rfind('_')]
                color['image_url'] = tmp_str
            except:
                color['image_url'] = product_img[0]

            color['alternative_image_urls'] = product_img
            color['pricing_list'] = self.get_pricing(skuMap, str_val_map, i, size_list, response.meta['price'])
            if len(color['pricing_list'])>0:
                color_set.append(color)
        
        item = MTSGetdataItem()
        item['product_url'] = response.meta['product_url']
        item['item_id'] = response.meta['item_id']
        item['title'] = response.meta['title']
        item['brand'] = 'Midi'
        item['merchant'] = 'Tmall'
        item['product_description'] = ''
        item['product_detail'] = detail[0]
        item['colors'] = color_set
        item['categories'] = info_dict['itemDO']['categoryId']
        if len(item['colors'])>0:
            yield item