def get(self): defaultDate=DefaultDate.get_by_key_name('date') if not defaultDate: defaultDate=DefaultDate(key_name='date') defaultDate.date=datetime.datetime.now().strftime("%Y%m%d") defaultDate.put() uri='http://www.haha.mx/good/day/' # uri='http://sso.maxthon.cn/get.php?host=www.haha.mx%2FCommon%2Fsso%2Fmaxthon_passport_sso.php&url=%2Fgood%2Fday%2F' self.jokeset=set() # self.jokelist=[] # p=urllib2.urlopen(uri) # print p start=self.request.get('start') if start: uri='http://www.haha.mx/good/year/' self.searchRSS(uri,int(start)) else: self.searchRSS(uri)
def get(self,date=None,page=1,limit=20): perdate='' afterdate='' if None==date: defaultdate=DefaultDate.get_by_key_name('date') date=defaultdate.date afterdate='flage' html=memcache_get('date'+date+'page'+str(page)+'limit'+str(limit)) if html: return self.flashhtml(html) perdate=(datetime.datetime.strptime(date,'%Y%m%d')-datetime.timedelta(hours =24)).strftime('%Y%m%d') if not afterdate: afterdate=(datetime.datetime.strptime(date,'%Y%m%d')+datetime.timedelta(hours =24)).strftime('%Y%m%d') else: afterdate='' if page: page=int(page) if limit: limit=int(limit) hahamap=memcache.get('query:date'+date+'page'+str(page)+'limit'+str(limit)) if not hahamap: hahalist=[] for joke in NewJoke.all().filter('date =',date).order('-updateTime'): hahalist.append(joke) total=int(len(hahalist)/limit) for i in range(total): if i==(total-1): memcache.set('query:date'+date+'page'+str(i+1)+'limit'+str(limit),{'hahalist':hahalist[i*limit:],'total':len(hahalist)},3600*72) else: memcache.set('query:date'+date+'page'+str(i+1)+'limit'+str(limit),{'hahalist':hahalist[i*limit:(i+1)*limit],'total':len(hahalist)},3600*72) hahamap=memcache.get('query:date'+date+'page'+str(page)+'limit'+str(limit)) if not hahamap: self.redirect('/%s/1/%s'%(perdate,limit)) return totalpage=hahamap.get('total',0) nowpage=None pagelist=[] for p in range(int(totalpage/limit)): if (page-1)==p: nowpage='/%s/%s/%s'%(date,(p+1),limit) pagelist.append({'page':'/%s/%s/%s'%(date,(p+1),limit),'pagenum':p+1}) if perdate==date: perdate='' html=self.obj2str('templates/jokeindex.html',{'hahalist':hahamap.get('hahalist',[]),'pagelist':pagelist,'nowpage':nowpage,'pagenum':page,'limit':limit,'nowdate':date,'perdate':perdate,'afterdate':afterdate}) memcache_set('date'+date+'page'+str(page)+'limit'+str(limit),html,3600*5*page) self.flashhtml(html) return
def skin(self,html): defaultDate=DefaultDate.get_by_key_name('date') jokeskin = JokePage() jokeskin.clearHtml() jokeskin.feed(str(html)) for j in jokeskin.l: try: joke= NewJoke.get_by_key_name('j'+j.get('jid')) if not joke: joke=NewJoke(key_name='j'+j.get('jid')) joke.date=defaultDate.date joke.joke = j.get('jokecontent','').strip().decode('utf-8') if j.get('img',''): joke.img = j.get('img').replace('/small/','/big/') joke.type=2 else: joke.type = 3 joke.put() except Exception,e: logging.error('111:'+str(e))
def skin0(self,html): # haha=[] ''' <div class='list-text' id='listText-242377'> <a href='###' class='list-pic' mark='242377' id='list-pic-242377' path='2012/01/18/' pic_name='242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg'> <img src='http://image.haha.mx/2012/01/18/small/242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg' onerror='this.onerror=null;this.src="http://static.haha.mx/images/img-error.jpg"'/> </a> ''' defaultDate=DefaultDate.get_by_key_name('date') #haha=re.findall('(?i)<div class=\'list-text\' id=\'listText-(\d+)\'[^>]*>(.*?)</div>',html) #hahaimg=re.findall('(?i)<a [^>]*mark=\'(\d+)\'[^>]*>[^<]*?<img src=\'(.*?)\'[^>]*>[^<]*</a>',html) haha=re.findall('(?i)<p class=\"block joke-item\" id=\"joke-(\d+)\"[^>]*>(.*?)</p>',html) hahaimg=re.findall('(?i)<a [^>]*id=\"thumbnail-(\d+)\"[^>]*>[^<]*?<img src=\"(.*?)\"[^>]*>',html) imgmap={} num=0 for i,src in hahaimg: imgmap[i]=src.replace('/small/','/big/') for idn,txt in haha: if idn not in self.jokeset: self.jokeset.add(idn) joke= NewJoke.get_by_key_name('j'+idn) if not joke: joke=NewJoke(key_name='j'+idn) joke.date=defaultDate.date num+1 # joke.joke= re.sub('(?i)<[/]{0,1}[\w]{1,5} [^>]*>','',re.sub('(?i)<a [^>]*>[^<]*</a>','',html_parser.unescape(txt))) joke.joke= re.findall('(?i)<p [^>]*>(.*?)</p>',html_parser.unescape(txt))[0] if imgmap.has_key(idn): joke.img=imgmap[idn] joke.type=2 else: joke.type=3 joke.put() # logging.info(str(i)) # self.jokelist.append({'id':idn,'txt':txt,'img':imgmap[idn]}) # else: # self.jokelist.append({'id':idn,'txt':txt}) return haha,imgmap