Пример #1
0
def skin(self,html):
    defaultDate=DefaultDate.get_by_key_name('date')
    jokeskin = JokePage()
    jokeskin.clearHtml()
    jokeskin.feed(str(html))

    for j in jokeskin.l:
        try:
            joke= NewJoke.get_by_key_name('j'+j.get('jid'))
            if not joke:
                joke=NewJoke(key_name='j'+j.get('jid'))
                joke.date=defaultDate.date
            joke.joke = j.get('jokecontent','').strip().decode('utf-8')
            if j.get('img',''):
                joke.img = j.get('img').replace('/small/','/big/')
                joke.type=2
            else:
                joke.type = 3
            joke.put()
        except Exception,e:
            logging.error('111:'+str(e))
Пример #2
0
def skin0(self,html):
#    haha=[]
    '''
    <div class='list-text' id='listText-242377'>

    <a href='###' class='list-pic' mark='242377' id='list-pic-242377' path='2012/01/18/' pic_name='242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg'>
            <img src='http://image.haha.mx/2012/01/18/small/242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg' onerror='this.onerror=null;this.src="http://static.haha.mx/images/img-error.jpg"'/>
        </a>
    '''
    defaultDate=DefaultDate.get_by_key_name('date')
    #haha=re.findall('(?i)<div class=\'list-text\' id=\'listText-(\d+)\'[^>]*>(.*?)</div>',html)
    #hahaimg=re.findall('(?i)<a [^>]*mark=\'(\d+)\'[^>]*>[^<]*?<img src=\'(.*?)\'[^>]*>[^<]*</a>',html)
    haha=re.findall('(?i)<p class=\"block joke-item\" id=\"joke-(\d+)\"[^>]*>(.*?)</p>',html)
    hahaimg=re.findall('(?i)<a [^>]*id=\"thumbnail-(\d+)\"[^>]*>[^<]*?<img src=\"(.*?)\"[^>]*>',html)
    imgmap={}
    num=0
    for i,src in hahaimg:
        imgmap[i]=src.replace('/small/','/big/')
    for idn,txt in haha:
        if idn not in self.jokeset:
            self.jokeset.add(idn)
            joke= NewJoke.get_by_key_name('j'+idn)
            if not joke:
                joke=NewJoke(key_name='j'+idn)
                joke.date=defaultDate.date
                num+1
#            joke.joke= re.sub('(?i)<[/]{0,1}[\w]{1,5} [^>]*>','',re.sub('(?i)<a [^>]*>[^<]*</a>','',html_parser.unescape(txt)))
            joke.joke= re.findall('(?i)<p [^>]*>(.*?)</p>',html_parser.unescape(txt))[0]
            if imgmap.has_key(idn):
                joke.img=imgmap[idn]
                joke.type=2
            else:
                joke.type=3
            joke.put()

#    logging.info(str(i))
#                self.jokelist.append({'id':idn,'txt':txt,'img':imgmap[idn]})
#            else:
#                self.jokelist.append({'id':idn,'txt':txt})
    return haha,imgmap