def parse_lefeng_item(self, reponse):
        """解析Lefeng Item"""
        data = reponse.body
        soup = BeautifulSoup(data, "html5lib")
        item = reponse.meta['item']
        pro_parameter_dic = {}

        title_div_tag = soup.find('div', class_="bigProduct-c")
        title_tag = title_div_tag.find('h1')
        title_i_tag = title_tag.find('i', recursive=False)
        if title_i_tag != None:
            title_i_tag.extract()
        pro_parameter_dic['title'] = Parse_Util.get_no_space_string(
            title_tag.text)
        print 'zzzzzzzzzzz------------- %s' % pro_parameter_dic

        detail_info_tag = soup.find('table', class_='detail-info-table')
        detail_tbody_tag = detail_info_tag.find('tbody', recursive=False)
        detail_tags = detail_tbody_tag.find_all('tr')
        pro_detail_parameter_dic = Parse_Util.structure_parameter_dic(
            detail_tags, u':')

        pro_parameter_dic = dict(pro_parameter_dic, **pro_detail_parameter_dic)

        price_c_tag = soup.find('div', class_='dity-price-c ')
        price_tag = price_c_tag.find('strong')
        origin_pirce_tag = price_c_tag.find('b', class_='marketPrice-s')

        pro_parameter_dic['price'] = origin_pirce_tag.text.strip(u'¥ ')
        pro_parameter_dic['promotion_price'] = price_tag.text

        item['other_parameter'] = pro_parameter_dic

        yield item
示例#2
0
    def parse_js_item(self, response):
        """解析js Item"""
        js_data = response.body
        js_data = js_data.decode('UTF-8')
        soup = BeautifulSoup(js_data, "html5lib")
        item = response.meta['item']
        parameter_dic = item['other_parameter']

        pro_price_tag = soup.find('p', class_='proPrice')
        price_span_tag = pro_price_tag.find('span', recursive=False)
        parameter_dic['price'] = price_span_tag.text

        pro_num_tag = soup.find('p', class_='proItem')
        # print 'nooooooooooo ------------- %s' % pro_num_tag
        item_no_dic = Parse_Util.structure_parameter_dic([pro_num_tag], u':')

        skuinfo_tag = soup.find('div', id='skuInfo')
        # print 'skuinfo_tag --------- %s' % skuinfo_tag
        other_parameter_dic = Parse_Util.make_up_dic(skuinfo_tag)

        pro_all_parameter_dic = dict(
            dict(parameter_dic, **other_parameter_dic), **item_no_dic)
        item['other_parameter'] = pro_all_parameter_dic

        yield item