def operate_cxjg_page(self,list_string,similar_partno,hc): ''' 处理查询结果页面 有精确匹配 精确匹配页面代码附加在list_string中;否则 相似页面url、partno附加在similar_partno中 参数: list_string: 保存每个digikey搜索型号对应精确页面的代码,格式[hc0,hc1...] similar_partno: 相似页面url 型号信息,格式[[url,partno],...] hc: 当前处理页面的代码 ''' res_url_partno = self.p_url_partno.findall(hc) for url,partno in res_url_partno: if partno == self.mmp: url_info = get_url_whole(self.url_search, url) if debug: print url,partno self.list_url_exact.append(url_info) ################## #print "in operate_cxjg_page:",url_info temp = gethtml(url_info) if temp is None: temp = '' hc_info = filter_html(temp) ################## if hc_info != 'timeout': self.list_string.append(hc_info) else: ''' 型号不精确 相似而已 ''' similar_partno.append([get_url_whole(self.url_search, url), partno])
def get_list_string_mmp(self): """ 不需要进入详细信息页面,只需要在查询结果页面获取即可 """ __dc__ = 'find the correct pageurl and get the correct pagestring of this correct pageurl' fun = 'function get_list_string_mmp of %s' % self.name self.list_string = [] self.similar_partno = [] tishi = u'in %s ' % fun if self.boolean_iu: ''' 用详细信息页面作为搜索条件 ''' tishi += u'\n查询页面: %s ' % self.info_url ################### #print "用详细信息页面作为搜索条件",self.info_url temp = gethtml(self.info_url) if temp is None: temp = '' self.string = filter_html(temp) ################### self.hc_cxym = self.string if self.string == 'timeout': tishi += u'\n 页面超时 ' self.list_string = ['timeout','timeout'] elif self.p_xxxx.findall(self.string): tishi += u'\n 页面正常进入详细信息页面 ' self.list_string = ['exact',self.string] self.list_url_exact.append(self.url_search) else: tishi += u'\n 页面异常 原本应该正常进入详细信息页面 ' self.list_string = [None,self.string] ''' 直接返回 不执行下述步骤 ''' if debug: print tishi return self.list_string ########################### #print "in get_list_string_mmp",self.url_search temp = gethtml(self.url_search) if temp is None: temp = '' self.string = filter_html(temp) ########################### self.hc_cxym = self.string tishi += u'\n查询页面: %s ' % self.url_search if self.string == 'timeout': tishi += u'\n 页面超时' self.list_string = ['timeout','timeout'] elif self.p_cxjg.findall(self.string): """ 处于 1 查询结果页面;""" tishi += u'\n 进入查询结果页面 ' ''' 处理表格页面中的两种情况: 1 精确 2 相似 ''' self.operate_cxjg_page(self.list_string,self.similar_partno,self.string) if self.list_string: count_exact,count_similar = len(self.list_string),0 self.list_string.insert(0,'exact') else: count_exact,count_similar = 0,len(self.similar_partno) self.list_string = ['similar',self.string,self.similar_partno] tishi += u'\n 得到 %s 个精确匹配页面 %s 个相似型号页面 ' % (count_exact,count_similar) elif self.p_xxxx.findall(self.string): """ 只有一个查询结果,查询结果页面跳转至详细信息页面 """ tishi += u'\n 进入详细信息页面 ' self.list_string = ['exact',self.string] self.list_url_exact.append(self.url_search) elif self.p_wjg.findall(self.string): """ 处于 3 提示无结果页面 """ tishi += u'\n 进入无结果页面 ' self.list_string = ['no_result',self.string] elif self.p_jgfl.findall(self.string): ''' 处于结果分类页面 十分麻烦 ''' tishi += u'\n 进入结果分类页面 ' res_jgfl_url_count = self.p_jgfl_url_count.findall(self.string) for jgfl_url,jgfl_count in res_jgfl_url_count: url_whole = get_url_whole(self.url_search,jgfl_url) ##################### #print "i处于结果分类页面",url_whole temp = gethtml(url_whole) if temp is None: temp = '' hc = filter_html(temp) ##################### if int(jgfl_count) == 1: ''' 进入详细信息页面 ''' if str('>%s<' % self.mmp) in hc: self.list_url_exact.append(url_whole) self.list_string.append(hc) else: ''' 进入表格页面 ''' ''' 处理表格页面中的两种情况: 1 精确 2 相似 ''' self.operate_cxjg_page(self.list_string,self.similar_partno,hc) if self.list_string: count_exact,count_similar = len(self.list_string),0 self.list_string.insert(0,'exact') else: count_exact,count_similar = 0,len(self.similar_partno) self.list_string = ['similar',self.string,self.similar_partno] tishi += u'\n 得到 %s 个精确匹配页面 %s 个相似型号页面 ' % (count_exact,count_similar) else: tishi += u'\n 发生异常 进入未知页面 ' self.list_string = [None,self.string] if debug: print tishi return self.list_string