def get_list_info_one_page(url_one_page): ''' list_info_one_page 两种格式: 1 ['timeout'] 2 [] 3 [dict0,..,dict9,查询结果页面的代码] 1-10个字典元素 + 页面代码 ''' fun = 'function get_list_info_one_page' hc = get_html_urllib2(url_one_page, 1) if hc != 'timeout': list_info = [] res_noresults = p_noresults.findall(hc) if res_noresults: ''' 无结果 ''' return list_info else: ''' 获取10个div层 ''' res_div_10 = get_div_10(hc) """ res_div_10 = p_div_10.findall(hc) """ for one_div_10 in res_div_10: one_info = {} try: #ts.test_list_regs(list_regs,one_div_10) res_mfr = p_mfr.findall(one_div_10) res_url_partno = p_url_partno.findall(one_div_10) one_info[keys_future[0]] = del_rnt_jkh(res_mfr[0]) partno_url = res_url_partno[0][0] one_info[keys_future[2]] = url_future_qz + partno_url partno_name = res_url_partno[0][1] one_info[keys_future[3]] = partno_name list_info.append(one_info) except Exception,e: print fun,'line 177 error url is:\n',url_one_page,'\nerror e is:',e list_info.append({}) ''' 加入查询页面的代码 ''' list_info.append(hc) return list_info
def get_list_cartprice(self,partno,qty,**dict_args): ''' 获取价格 判断能否马上获取购物车价格 ''' mouser_partno = dict_args.get('mouser_partno','') if mouser_partno: html = self.get_carthtml('',qty,mouser_partno=mouser_partno) else: html = self.get_carthtml(partno,qty) ''' ['timeout/right/error',{key,value,...}, ...] ''' list_cartprice = [] list_flag = [ ('timeout', '请求异常'), ('right', '多匹配/单匹配/不匹配'), ('error', '发生了未知状况,需要写入日志文件'), ] if html == 'timeout': list_cartprice.append(list_flag[0][0]) else: res_cart_multi = p_cart_multi.findall(html) res_cart_no = p_cart_no.findall(html) res_cart_one = p_cart_one.findall(html) ''' 快速方便显示该型号对应的匹配情况 ''' list_desc_mno = [ (u'该型号购物车对应 多匹配', res_cart_multi), (u'该型号购物车对应 不匹配', res_cart_no), (u'该型号购物车对应 单匹配', res_cart_one), ] """ #显示多中匹配方式下的特殊代码 print '142 res_cart_multi:',res_cart_multi print '143 res_cart_no:',res_cart_no print '144 res_cart_one:',res_cart_one """ for desc,res_mno in list_desc_mno: if res_mno: #print 174,desc break if res_cart_multi: ''' 多匹配 从多匹配提示页面获取精确的mosuer型号名称 获取后依次处理mouser型号名称 ''' list_cartprice.append(list_flag[1][0]) url_multi = res_cart_multi[0].replace('..', 'http://cn.mouser.com') html_multi = get_html_urllib2(url_multi, 1) if html_multi == 'timeout': ''' 获取mouser多型号选择页面时 超时或者异常 此时直接返回这样的结果格式: ['timeout'] ''' list_cartprice = [list_flag[0][0]] return list_cartprice res_partno_mouser_mfr = p_partno_mouser_mfr.findall(html_multi) for partno_mou,partno_mfr in res_partno_mouser_mfr: if partno_mfr.upper() == partno.upper(): ''' 找到了该型号名称对应的mouser型号名称 此时每次循环必须要处理化一个新的浏览器对象 避免购物车记录叠加 ''' gcp_tmp = GetCartPrice() html_tmp = gcp_tmp.get_carthtml('',qty,mouser_partno=partno_mou) if html_tmp != 'timeout': ''' 不处理请求超时的情况 ''' one_cartprice_tmp = gcp_tmp.get_one_cartprice(html_tmp) list_cartprice.append(one_cartprice_tmp) elif res_cart_no: list_cartprice.append(list_flag[1][0]) elif res_cart_one: list_cartprice.append(list_flag[1][0]) one_cartprice = self.get_one_cartprice(html) list_cartprice.append(one_cartprice) else: list_cartprice.append(list_flag[2][0]) #print 181,list_cartprice return list_cartprice
def get_one_info_more(url,dict_info): ''' 在详细信息页面获取所需信息,发现future在2011:08:25改版,无法获取全部的价格,所以进入详细信息页面获取信息 dict_info: {'partno_url':第一个参数url,'partno_name':搜索型号,'mfr':厂商} ''' fun = 'fun get_one_info_more' hc = get_html_urllib2(url, 1) one_info_more = dict_info """ list_regs_more.append(['depend',(re_table_price,'详细信息页面中的价格表格'),(re_price_num_save,'价格表格的每行')]) list_regs_more.append(['depend',(re_div_info,'详细信息页面中的显示层'),(re_qis,'Qty in Stock取值'),(re_rs,'Reserve Stock取值')]) ts.test_list_regs(list_regs_more,hc) wt(241) """ if hc == '' or hc == 'timeout': #one_info_more = {} pass else: partno_price = '' div_info = p_div_info.findall(hc)#[0]#获取显示信息大层 if not div_info: one_info_more = {} else: div_info = div_info[0] table_price = p_table_price.findall(div_info)#[0]#在大层中获取显示价格信息的表 if table_price: ''' 成功获取价格表 ''' table_price = table_price[0] partno_price = '' price_num_save = p_price_num_save.findall(table_price) for one in price_num_save: partno_price += del_jjh(one[0]) + ':' + one[1] + '|||' partno_price = partno_price[:-3] one_info_more[keys_mouser_tt[10][1]] = partno_price """ 进入show more prices页面 该页面的库存数据可能会和查询结果页面的库存数量有点小变化 以该页面的库存数量为准 重新抓取Qty In Stock 2010年 09月 02日 星期四 16:13:56 CST """ res_qis = p_qis.findall(div_info) if res_qis: one_info_more[keys_mouser_tt[5][1]] = res_qis[0].replace(',','') res_rs = p_rs.findall(div_info) if res_rs: one_info_more[keys_future[11]] = res_rs[0].replace(',','') res_desc = p_desc.findall(div_info) if res_desc: one_info_more[keys_future[1]] = res_desc[0].strip() res_Packaging = p_Packaging.findall(div_info) if res_Packaging: one_info_more[keys_future[4]] = res_Packaging[0].strip() #print 279,one_info_more one_info_more[keys_future[12]] = hc one_info_more = change_key(one_info_more) else: pass return one_info_more