示例#1
0
 def operate_cxjg_page(self,list_string,similar_partno,hc):
     ''' 
     处理查询结果页面 有精确匹配 精确匹配页面代码附加在list_string中;否则 相似页面url、partno附加在similar_partno中
     参数:   list_string:    保存每个digikey搜索型号对应精确页面的代码,格式[hc0,hc1...]
             similar_partno: 相似页面url 型号信息,格式[[url,partno],...] 
             hc:             当前处理页面的代码
     '''
     res_url_partno = self.p_url_partno.findall(hc)
     for url,partno in res_url_partno:
         if partno == self.mmp:
             url_info = get_url_whole(self.url_search, url)
             if debug: print url,partno
             self.list_url_exact.append(url_info)
             ##################
             #print "in operate_cxjg_page:",url_info
             temp = gethtml(url_info)
             if temp is None:
                 temp = ''
             hc_info = filter_html(temp)
             ##################
             if hc_info != 'timeout':
                 self.list_string.append(hc_info)
         else:
             ''' 型号不精确 相似而已 '''
             similar_partno.append([get_url_whole(self.url_search, url), partno])
示例#2
0
    def get_list_string_mmp(self):
        """ 不需要进入详细信息页面,只需要在查询结果页面获取即可 """
        __dc__ = 'find the correct pageurl and get the correct pagestring of this correct pageurl'
        fun = 'function get_list_string_mmp of %s' % self.name
        self.list_string        = []
        self.similar_partno     = []
        
        tishi = u'in %s ' % fun
        if self.boolean_iu:
            ''' 用详细信息页面作为搜索条件 '''
            tishi += u'\n查询页面: %s ' % self.info_url
            ###################
            #print "用详细信息页面作为搜索条件",self.info_url
            temp = gethtml(self.info_url)
            if temp is None:
                temp = ''
            self.string = filter_html(temp)
            ###################
            
            self.hc_cxym = self.string
            if self.string == 'timeout':
                tishi += u'\n 页面超时 '
                self.list_string = ['timeout','timeout']
            elif self.p_xxxx.findall(self.string):
                tishi += u'\n 页面正常进入详细信息页面 '
                self.list_string = ['exact',self.string]
                self.list_url_exact.append(self.url_search)
            else:
                tishi += u'\n 页面异常 原本应该正常进入详细信息页面 '
                self.list_string = [None,self.string]

            ''' 直接返回  不执行下述步骤 '''
            if debug: print tishi
            return self.list_string
        ###########################
        #print "in get_list_string_mmp",self.url_search
        temp = gethtml(self.url_search)
        if temp is None:
            temp = ''
        self.string = filter_html(temp)
        ###########################
        self.hc_cxym = self.string

        tishi += u'\n查询页面: %s ' % self.url_search
        if self.string == 'timeout':
            tishi += u'\n 页面超时'
            self.list_string = ['timeout','timeout']
        elif self.p_cxjg.findall(self.string):
            """ 处于 1 查询结果页面;"""
            tishi += u'\n 进入查询结果页面 '
            ''' 处理表格页面中的两种情况: 1 精确 2 相似 '''
            self.operate_cxjg_page(self.list_string,self.similar_partno,self.string)
            if self.list_string:
                count_exact,count_similar = len(self.list_string),0
                self.list_string.insert(0,'exact')
            else:
                count_exact,count_similar = 0,len(self.similar_partno)
                self.list_string = ['similar',self.string,self.similar_partno]

            tishi += u'\n 得到 %s 个精确匹配页面 %s 个相似型号页面 ' % (count_exact,count_similar)
        elif self.p_xxxx.findall(self.string):
            """ 只有一个查询结果,查询结果页面跳转至详细信息页面 """
            tishi += u'\n 进入详细信息页面 '
            self.list_string = ['exact',self.string]
            self.list_url_exact.append(self.url_search)
        elif self.p_wjg.findall(self.string):
            """ 处于 3 提示无结果页面 """
            tishi += u'\n 进入无结果页面 '
            self.list_string = ['no_result',self.string]
        elif self.p_jgfl.findall(self.string):
            ''' 处于结果分类页面  十分麻烦 '''
            tishi += u'\n 进入结果分类页面 '
            res_jgfl_url_count = self.p_jgfl_url_count.findall(self.string)
            for jgfl_url,jgfl_count in res_jgfl_url_count:
                url_whole = get_url_whole(self.url_search,jgfl_url)
                #####################
                #print "i处于结果分类页面",url_whole
                temp = gethtml(url_whole)
                if temp is None:
                    temp = ''
                hc = filter_html(temp)
                #####################
                if int(jgfl_count) == 1:
                    ''' 进入详细信息页面 '''
                    if str('>%s<' % self.mmp) in hc:
                        self.list_url_exact.append(url_whole)
                        self.list_string.append(hc)
                else:
                    ''' 进入表格页面 '''
                    ''' 处理表格页面中的两种情况: 1 精确 2 相似 '''
                    self.operate_cxjg_page(self.list_string,self.similar_partno,hc)
            if self.list_string:
                count_exact,count_similar = len(self.list_string),0
                self.list_string.insert(0,'exact')
            else:
                count_exact,count_similar = 0,len(self.similar_partno)
                self.list_string = ['similar',self.string,self.similar_partno]
            
            tishi += u'\n 得到 %s 个精确匹配页面 %s 个相似型号页面 ' % (count_exact,count_similar)
        else:
            tishi += u'\n 发生异常 进入未知页面 '
            self.list_string = [None,self.string]

        if debug: print tishi
        return self.list_string