def parse_lefeng_item(self, reponse): """解析Lefeng Item""" data = reponse.body soup = BeautifulSoup(data, "html5lib") item = reponse.meta['item'] pro_parameter_dic = {} title_div_tag = soup.find('div', class_="bigProduct-c") title_tag = title_div_tag.find('h1') title_i_tag = title_tag.find('i', recursive=False) if title_i_tag != None: title_i_tag.extract() pro_parameter_dic['title'] = Parse_Util.get_no_space_string( title_tag.text) print 'zzzzzzzzzzz------------- %s' % pro_parameter_dic detail_info_tag = soup.find('table', class_='detail-info-table') detail_tbody_tag = detail_info_tag.find('tbody', recursive=False) detail_tags = detail_tbody_tag.find_all('tr') pro_detail_parameter_dic = Parse_Util.structure_parameter_dic( detail_tags, u':') pro_parameter_dic = dict(pro_parameter_dic, **pro_detail_parameter_dic) price_c_tag = soup.find('div', class_='dity-price-c ') price_tag = price_c_tag.find('strong') origin_pirce_tag = price_c_tag.find('b', class_='marketPrice-s') pro_parameter_dic['price'] = origin_pirce_tag.text.strip(u'¥ ') pro_parameter_dic['promotion_price'] = price_tag.text item['other_parameter'] = pro_parameter_dic yield item
def parse_js_item(self, response): """解析js Item""" js_data = response.body js_data = js_data.decode('UTF-8') soup = BeautifulSoup(js_data, "html5lib") item = response.meta['item'] parameter_dic = item['other_parameter'] pro_price_tag = soup.find('p', class_='proPrice') price_span_tag = pro_price_tag.find('span', recursive=False) parameter_dic['price'] = price_span_tag.text pro_num_tag = soup.find('p', class_='proItem') # print 'nooooooooooo ------------- %s' % pro_num_tag item_no_dic = Parse_Util.structure_parameter_dic([pro_num_tag], u':') skuinfo_tag = soup.find('div', id='skuInfo') # print 'skuinfo_tag --------- %s' % skuinfo_tag other_parameter_dic = Parse_Util.make_up_dic(skuinfo_tag) pro_all_parameter_dic = dict( dict(parameter_dic, **other_parameter_dic), **item_no_dic) item['other_parameter'] = pro_all_parameter_dic yield item