def UpdateLastDelivered(self, title, num): userName = self.UserName() dbItem = LastDelivered.all().filter('username = '******'bookname = ', title).get() self.last_delivered_volume = u' 第%d话' % num if dbItem: dbItem.num = num dbItem.record = self.last_delivered_volume dbItem.datetime = datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE) else: dbItem = LastDelivered(username=userName, bookname=title, num=num, record=self.last_delivered_volume, datetime=datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE)) dbItem.put()
def ParseFeedUrls(self): urls = [] #用于返回 userName = self.UserName() for item in self.feeds: title, url = item[0], item[1] comic_id = "" lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title) oldNum = 0 else: oldNum = lastCount.num urlpaths = urlparse.urlsplit(url.lower()).path.split("/") if ( (u"id" in urlpaths) and (urlpaths.index(u"id")+1 < len(urlpaths)) ): comic_id = urlpaths[urlpaths.index(u"id")+1] if ( (not comic_id.isdigit()) or (comic_id=="") ): self.log.warn('can not get comic id: %s' % url) break chapterList = self.getChapterList(comic_id) for deliverCount in range(5): newNum = oldNum + deliverCount if newNum < len(chapterList): imgList = self.getImgList(chapterList[newNum], comic_id) for img in imgList: urls.append((title, img, img, None)) self.UpdateLastDelivered(title, newNum+1) if newNum == 0: break return urls
def GetNewComic(self): urls = [] if not self.feeds: return [] userName = self.UserName() decoder = AutoDecoder(isfeed=False) for item in self.feeds: title, url = item[0], item[1] lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title) oldNum = 0 else: oldNum = lastCount.num opener = URLOpener(self.host, timeout=60) result = opener.open(url) if result.status_code != 200: self.log.warn('fetch index page for %s failed[%s] : %s' % (title, URLOpener.CodeMap(result.status_code), url)) continue content = result.content content = self.AutoDecodeContent(content, decoder, self.feed_encoding, opener.realurl, result.headers) soup = BeautifulSoup(content, 'lxml') allComicTable = soup.find_all('table', {'width': '688'}) addedForThisComic = False for comicTable in allComicTable: comicVolumes = comicTable.find_all('a', {'target': '_blank'}) for volume in comicVolumes: texts = volume.text.split(' ') if len(texts) > 2 and texts[1].isdigit() and volume.get('href'): num = int(texts[1]) if num > oldNum: oldNum = num href = self.urljoin(self.host, volume.get('href')) urls.append((title, num, href)) addedForThisComic = True break #一次只推送一卷(有时候一卷已经很多图片了) if addedForThisComic: break return urls