Python EPUBOutput примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.makeoeb

Класс/Тип: EPUBOutput

Примеров на hotexamples.com: 5

Python EPUBOutput - 5 примеров найдено. Это лучшие примеры Python кода для lib.makeoeb.EPUBOutput, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

EPUBOutput(2)

convert(2)

Пример #1

Показать файл

Файл: handlemail.py Проект: Iam42/KindleEar

 def receive(self, message):
     #如果有多个收件人的话，只解释第一个收件人
     to = parseaddr(message.to)[1]
     to = to.split('@')[0] if to and '@' in to else 'xxx'
     if '__' in to:
         listto = to.split('__')
         username = listto[0] if listto[0] else 'admin'
         to = listto[1]
     else:
         username = '******'
         
     user = KeUser.all().filter('name = ', username).get()
     if not user:
         username = '******'
         user = KeUser.all().filter('name = ', username).get()
     
     if not user or not user.kindle_email:
         self.response.out.write('No account or no email configured!')
         return
     
     sender = parseaddr(message.sender)[1]
     mailhost = sender.split('@')[1] if sender and '@' in sender else None
     if (not sender or not mailhost) or \
         (not user.whitelist.filter('mail = ', '*').get()
         and not user.whitelist.filter('mail = ', sender.lower()).get()
         and not user.whitelist.filter('mail = ', '@' + mailhost.lower()).get()):
         self.response.out.write("Spam mail!")
         default_log.warn('Spam mail from : %s' % sender)
         return
     
     if hasattr(message, 'subject'):
         subject = decode_subject(message.subject)
     else:
         subject = u"NoSubject"
     
     #通过邮件触发一次“现在投递”
     if to.lower() == 'trigger':
         return self.TrigDeliver(subject, username)
         
     # R是判断一个字符串是否是链接的正则表达式
     R = r"""^(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>???“”‘’]))"""
     txt_bodies = message.bodies('text/plain')
     html_bodies = message.bodies('text/html')
     try:
         allBodies = [body.decode() for ctype, body in html_bodies]
     except:
         default_log.warn('Decode html bodies of mail failed.')
         allBodies = []
     if len(allBodies) == 0: #此邮件为纯文本邮件
         default_log.info('no html body, use text body.')
         try:
             allBodies = [body.decode() for ctype, body in txt_bodies]
         except:
             default_log.warn('Decode text bodies of mail failed.')
             allBodies = []
         bodies = u''.join(allBodies)
         if not bodies:
             return
             
         M = re.match(R, bodies) #判断是否是链接
         if M is not None:
             link = M.group()
             if len(bodies[len(link):]) < WORDCNT_THRESHOLD_FOR_APMAIL:
                 bodies = '<a href="%s">%s</a>' % (link,link)
         bodies = u"""<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
                 <title>%s</title></head><body>%s</body></html>""" %(subject,bodies)
         allBodies = [bodies.encode('utf-8')]
     
     soup = BeautifulSoup(allBodies[0], 'lxml')
     h = soup.find('head')
     if not h:
         h = soup.new_tag('head')
         soup.html.insert(0, h)
     t = soup.find('title')
     if not t:
         t = soup.new_tag('title')
         t.string = subject
         soup.html.head.insert(0, t)
     
     if len(allBodies) > 1:
         for o in allBodies[1:]:
             so = BeautifulSoup(o, 'lxml')
             b = so.find('body')
             if not b:
                 continue
             for c in b.contents:
                 soup.body.append(c)
     
     #只有一个链接并且邮件字数很少则认为需要抓取网页，否则直接转发邮件正文
     links = list(soup.body.find_all('a',attrs={'href':True}))
     link = links[0]['href'] if links else ''
     text = ' '.join([s for s in soup.body.stripped_strings])
     M = re.match(R, text)
     if M is not None:
         link = M.group()
         links = [link]
         text = text.replace(link, '')
     elif len(links) == 1:
         text = text.replace(link, '') #去掉可能的链接本身字符
         
     if len(links) == 1 and len(text) < WORDCNT_THRESHOLD_FOR_APMAIL:
         #判断是下载文件还是转发内容
         isbook = bool(to.lower() in ('book', 'file', 'download'))
         isbook = link[-5:].lower() in ('.mobi','.epub','.docx') if not isbook else isbook
         isbook = link[-4:].lower() in ('.pdf','.txt','.doc','.rtf') if not isbook else isbook
         
         param = {'u':username,
                  'url':link,
                  'type':'Download' if isbook else user.book_type,
                  'to':user.kindle_email,
                  'tz':user.timezone,
                  'subject':subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                  'lng':user.ownfeeds.language,
                  'keepimage':'1' if user.ownfeeds.keep_image else '0'
                 }
         taskqueue.add(url='/url2book',queue_name="deliverqueue1",method='GET',
             params=param)
     else: #直接转发邮件正文
         #先判断是否有图片
         from lib.makeoeb import MimeFromFilename
         hasimage = False
         if hasattr(message, 'attachments'):
             for f,c in message.attachments:
                 if MimeFromFilename(f):
                     hasimage = True
                     break
         
         #有图片的话，要生成MOBI或EPUB才行
         #而且多看邮箱不支持html推送，也先转换epub再推送
         if hasimage or (user.book_type == "epub"):
             from main import local_time
             from lib.makeoeb import (getOpts, CreateOeb, setMetaData,
                                 ServerContainer, byteStringIO, 
                                 EPUBOutput, MOBIOutput)
             
             #仿照Amazon的转换服务器的处理，去掉CSS
             if DELETE_CSS_FOR_APPSPOTMAIL:
                 tag = soup.find('style', attrs={'type':'text/css'})
                 if tag:
                     tag.extract()
                 for tag in soup.find_all(attrs={'style':True}):
                     del tag['style']
             
             #将图片的src的文件名调整好
             for img in soup.find_all('img',attrs={'src':True}):
                 if img['src'].lower().startswith('cid:'):
                     img['src'] = img['src'][4:]
             
             opts = getOpts()
             oeb = CreateOeb(default_log, None, opts)
             
             setMetaData(oeb, subject[:SUBJECT_WORDCNT_FOR_APMAIL], 
                 user.ownfeeds.language, local_time(tz=user.timezone), 
                 pubtype='book:book:KindleEar')
             oeb.container = ServerContainer(default_log)
             id, href = oeb.manifest.generate(id='page', href='page.html')
             item = oeb.manifest.add(id, href, 'application/xhtml+xml', data=unicode(soup))
             oeb.spine.add(item, False)
             oeb.toc.add(subject, href)
             
             if hasattr(message, 'attachments'):
                 for filename,content in message.attachments:
                     mimetype = MimeFromFilename(filename)
                     if mimetype:
                         try:
                             content = content.decode()
                         except:
                             pass
                         else:
                             id, href = oeb.manifest.generate(id='img', href=filename)
                             item = oeb.manifest.add(id, href, mimetype, data=content)
             
             oIO = byteStringIO()
             o = EPUBOutput() if user.book_type == "epub" else MOBIOutput()
             o.convert(oeb, oIO, opts, default_log)
             BaseHandler.SendToKindle(username, user.kindle_email, 
                 subject[:SUBJECT_WORDCNT_FOR_APMAIL], 
                 user.book_type, str(oIO.getvalue()), user.timezone)
         else: #没有图片则直接推送HTML文件，阅读体验更佳
             m = soup.find('meta', attrs={"http-equiv":"Content-Type"})
             if not m:
                 m = soup.new_tag('meta', content="text/html; charset=utf-8")
                 m["http-equiv"] = "Content-Type"
                 soup.html.head.insert(0,m)
             else:
                 m['content'] = "text/html; charset=utf-8"
             
             html = unicode(soup).encode('utf-8')
             BaseHandler.SendToKindle(username, user.kindle_email, 
                 subject[:SUBJECT_WORDCNT_FOR_APMAIL], 'html', html, user.timezone, False)
     self.response.out.write('Done')

Пример #2

Показать файл

Файл: handlemail.py Проект: xujun05/kindleear

    def receive(self, message):
        #如果有多个收件人的话，只解释第一个收件人
        to = parseaddr(message.to)[1]
        to = to.split('@')[0] if to and '@' in to else 'xxx'
        if '__' in to:
            listto = to.split('__')
            username = listto[0] if listto[0] else 'admin'
            to = listto[1]
        else:
            username = '******'

        user = KeUser.all().filter('name = ', username).get()
        if not user:
            username = '******'
            user = KeUser.all().filter('name = ', username).get()

        if not user or not user.kindle_email:
            self.response.out.write('No account or no email configured!')
            return

        sender = parseaddr(message.sender)[1]
        mailhost = sender.split('@')[1] if sender and '@' in sender else None
        if (not sender or not mailhost) or \
            (not user.whitelist.filter('mail = ', '*').get()
            and not user.whitelist.filter('mail = ', sender.lower()).get()
            and not user.whitelist.filter('mail = ', '@' + mailhost.lower()).get()):
            self.response.out.write("Spam mail!")
            log.warn('Spam mail from : %s' % sender)
            return

        if hasattr(message, 'subject'):
            subject = decode_subject(message.subject).strip()
        else:
            subject = u"NoSubject"

        #邮件主题中如果在最后添加一个 !links，则强制提取邮件中的链接然后生成电子书
        forceToLinks = False
        forceToArticle = False
        if subject.endswith('!links'):
            subject = subject.replace('!links', '').rstrip()
            forceToLinks = True
        elif subject.find(' !links ') >= 0:
            subject = subject.replace(' !links ', '')
            forceToLinks = True

        #如果邮件主题在最后添加一个 !article，则强制转换邮件内容为电子书，忽略其中的链接
        if not forceToLinks:
            if subject.endswith('!article'):
                subject = subject.replace('!article', '').rstrip()
                forceToArticle = True
            elif subject.find(' !article ') >= 0:
                subject = subject.replace(' !article ', '')
                forceToArticle = True

        #通过邮件触发一次“现在投递”
        if to.lower() == 'trigger':
            return self.TrigDeliver(subject, username)

        #获取和解码邮件内容
        txt_bodies = message.bodies('text/plain')
        html_bodies = message.bodies('text/html')
        try:
            allBodies = [body.decode() for ctype, body in html_bodies]
        except:
            log.warn('Decode html bodies of mail failed.')
            allBodies = []

        #此邮件为纯文本邮件
        if len(allBodies) == 0:
            log.info('no html body, use text body.')
            try:
                allBodies = [body.decode() for ctype, body in txt_bodies]
            except:
                log.warn('Decode text bodies of mail failed.')
                allBodies = []
            bodies = u''.join(allBodies)
            if not bodies:
                return
            bodyurls = []
            for l in bodies.split('\n'):
                l = l.strip()
                if not l:
                    continue
                link = IsHyperLink(l)
                if link:
                    bodyurls.append('<a href="%s">%s</a><br />' % (link, link))
                else:
                    break

            bodies = u"""<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
              <title>%s</title></head><body>%s</body></html>""" % (
                subject, ''.join(bodyurls) if bodyurls else bodies)
            allBodies = [bodies.encode('utf-8')]

        #开始处理邮件内容
        soup = BeautifulSoup(allBodies[0], 'lxml')

        #合并多个邮件文本段
        if len(allBodies) > 1:
            for o in allBodies[1:]:
                so = BeautifulSoup(o, 'lxml')
                b = so.find('body')
                if not b:
                    continue
                for c in b.contents:
                    soup.body.append(c)

        #判断邮件内容是文本还是链接（包括多个链接的情况）
        links = []
        body = soup.body if soup.find('body') else soup
        if not forceToArticle:  #如果强制转正文就不分析链接了，否则先分析和提取链接
            for s in body.stripped_strings:
                link = IsHyperLink(s)
                if link:
                    if link not in links:
                        links.append(link)
                #如果是多个链接，则必须一行一个，不能留空，除非强制提取链接
                #这个处理是为了去除部分邮件客户端在邮件末尾添加的一个广告链接
                elif not forceToLinks:
                    break

        if not links and not forceToArticle:  #如果通过正常字符（显示出来的）判断没有链接，则看html的a标签
            links = [
                link['href']
                for link in soup.find_all('a', attrs={'href': True})
            ]

            text = ' '.join([s for s in body.stripped_strings])

            #如果有相对路径，则在里面找一个绝对路径，然后转换其他
            hasRelativePath = False
            fullPath = ''
            for link in links:
                text = text.replace(link, '')
                if not link.startswith('http'):
                    hasRelativePath = True
                if not fullPath and link.startswith('http'):
                    fullPath = link

            if hasRelativePath and fullPath:
                for idx, link in enumerate(links):
                    if not link.startswith('http'):
                        links[idx] = urllib.urljoin(fullPath, link)

            #如果字数太多，则认为直接推送正文内容
            if not forceToLinks and (len(links) != 1 or
                                     len(text) > WORDCNT_THRESHOLD_FOR_APMAIL):
                links = []

        if links:
            #判断是下载文件还是转发内容
            isBook = bool(to.lower() in ('book', 'file', 'download'))
            if not isBook:
                isBook = bool(link[-5:].lower() in ('.mobi', '.epub', '.docx'))
            if not isBook:
                isBook = bool(link[-4:].lower() in ('.pdf', '.txt', '.doc',
                                                    '.rtf'))
            isDebug = bool(to.lower() == 'debug')

            if isDebug:
                bookType = 'Debug'
            elif isBook:
                bookType = 'Download'
            else:
                bookType = user.book_type

            param = {
                'u': username,
                'urls':
                base64.urlsafe_b64encode(zlib.compress('|'.join(links), 9)),
                'type': bookType,
                'to': user.kindle_email,
                'tz': user.timezone,
                'subject': subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                'lng': user.ownfeeds.language,
                'keepimage': '1' if user.ownfeeds.keep_image else '0'
            }
            taskqueue.add(url='/url2book',
                          queue_name="deliverqueue1",
                          method='GET',
                          params=param,
                          target='worker')
        else:  #直接转发邮件正文
            #先判断是否有图片
            from lib.makeoeb import MimeFromFilename
            hasimage = False
            if hasattr(message, 'attachments'):
                for f, c in message.attachments:
                    if MimeFromFilename(f):
                        hasimage = True
                        break

            #先修正不规范的HTML邮件
            h = soup.find('head')
            if not h:
                h = soup.new_tag('head')
                soup.html.insert(0, h)
            t = soup.head.find('title')
            if not t:
                t = soup.new_tag('title')
                t.string = subject
                soup.head.insert(0, t)

            #有图片的话，要生成MOBI或EPUB才行
            #而且多看邮箱不支持html推送，也先转换epub再推送
            if hasimage or (user.book_type == "epub"):
                from main import local_time
                from lib.makeoeb import (getOpts, CreateOeb, setMetaData,
                                         ServerContainer, byteStringIO,
                                         EPUBOutput, MOBIOutput)

                #仿照Amazon的转换服务器的处理，去掉CSS
                if DELETE_CSS_FOR_APPSPOTMAIL:
                    tag = soup.find('style', attrs={'type': 'text/css'})
                    if tag:
                        tag.extract()
                    for tag in soup.find_all(attrs={'style': True}):
                        del tag['style']

                #将图片的src的文件名调整好
                for img in soup.find_all('img', attrs={'src': True}):
                    if img['src'].lower().startswith('cid:'):
                        img['src'] = img['src'][4:]

                opts = getOpts()
                oeb = CreateOeb(log, None, opts)

                setMetaData(oeb,
                            subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                            user.ownfeeds.language,
                            local_time(tz=user.timezone),
                            pubtype='book:book:KindleEar')
                oeb.container = ServerContainer(log)
                id_, href = oeb.manifest.generate(id='page', href='page.html')
                item = oeb.manifest.add(id_,
                                        href,
                                        'application/xhtml+xml',
                                        data=unicode(soup))
                oeb.spine.add(item, False)
                oeb.toc.add(subject, href)

                if hasattr(message, 'attachments'):
                    for filename, content in message.attachments:
                        mimetype = MimeFromFilename(filename)
                        if mimetype:
                            try:
                                content = content.decode()
                            except:
                                pass
                            else:
                                id_, href = oeb.manifest.generate(
                                    id='img', href=filename)
                                item = oeb.manifest.add(id_,
                                                        href,
                                                        mimetype,
                                                        data=content)

                oIO = byteStringIO()
                o = EPUBOutput() if user.book_type == "epub" else MOBIOutput()
                o.convert(oeb, oIO, opts, log)
                BaseHandler.SendToKindle(username, user.kindle_email,
                                         subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                                         user.book_type, str(oIO.getvalue()),
                                         user.timezone)
            else:  #没有图片则直接推送HTML文件，阅读体验更佳
                m = soup.find('meta', attrs={"http-equiv": "Content-Type"})
                if not m:
                    m = soup.new_tag('meta',
                                     content="text/html; charset=utf-8")
                    m["http-equiv"] = "Content-Type"
                    soup.html.head.insert(0, m)
                else:
                    m['content'] = "text/html; charset=utf-8"

                html = unicode(soup).encode('utf-8')
                BaseHandler.SendToKindle(username, user.kindle_email,
                                         subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                                         'html', html, user.timezone, False)
        self.response.out.write('Done')

Пример #3

Показать файл

Файл: handlemail.py Проект: Chansie/KindleEar

    def receive(self, message):
        # 如果有多个收件人的话，只解释第一个收件人
        to = parseaddr(message.to)[1]
        to = to.split("@")[0] if to and "@" in to else "xxx"
        if "__" in to:
            listto = to.split("__")
            username = listto[0] if listto[0] else "admin"
            to = listto[1]
        else:
            username = "******"

        user = KeUser.all().filter("name = ", username).get()
        if not user:
            username = "******"
            user = KeUser.all().filter("name = ", username).get()

        if not user or not user.kindle_email:
            self.response.out.write("No account or no email configured!")
            return

        sender = parseaddr(message.sender)[1]
        mailhost = sender.split("@")[1] if sender and "@" in sender else None
        if (not sender or not mailhost) or (
            not user.whitelist.filter("mail = ", "*").get()
            and not user.whitelist.filter("mail = ", sender.lower()).get()
            and not user.whitelist.filter("mail = ", "@" + mailhost.lower()).get()
        ):
            self.response.out.write("Spam mail!")
            log.warn("Spam mail from : %s" % sender)
            return

        if hasattr(message, "subject"):
            subject = decode_subject(message.subject).strip()
        else:
            subject = u"NoSubject"

        # 邮件主题中如果在最后添加一个 !links，则强制提取邮件中的链接然后生成电子书
        forceToLinks = False
        forceToArticle = False
        if subject.endswith("!links"):
            subject = subject.replace("!links", "").rstrip()
            forceToLinks = True
        elif subject.find(" !links ") >= 0:
            subject = subject.replace(" !links ", "")
            forceToLinks = True

        if subject.endswith("!article"):
            subject = subject.replace("!article", "").rstrip()
            forceToArticle = True
        elif subject.find(" !article ") >= 0:
            subject = subject.replace(" !article ", "")
            forceToArticle = True

        # 通过邮件触发一次“现在投递”
        if to.lower() == "trigger":
            return self.TrigDeliver(subject, username)

        # 获取和解码邮件内容
        txt_bodies = message.bodies("text/plain")
        html_bodies = message.bodies("text/html")
        try:
            allBodies = [body.decode() for ctype, body in html_bodies]
        except:
            log.warn("Decode html bodies of mail failed.")
            allBodies = []

        # 此邮件为纯文本邮件
        if len(allBodies) == 0:
            log.info("no html body, use text body.")
            try:
                allBodies = [body.decode() for ctype, body in txt_bodies]
            except:
                log.warn("Decode text bodies of mail failed.")
                allBodies = []
            bodies = u"".join(allBodies)
            if not bodies:
                return
            bodyurls = []
            for l in bodies.split("\n"):
                l = l.strip()
                if not l:
                    continue
                link = IsHyperLink(l)
                if link:
                    bodyurls.append('<a href="%s">%s</a><br />' % (link, link))
                else:
                    break

            bodies = u"""<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
              <title>%s</title></head><body>%s</body></html>""" % (
                subject,
                "".join(bodyurls) if bodyurls else bodies,
            )
            allBodies = [bodies.encode("utf-8")]

        # 开始处理邮件内容
        soup = BeautifulSoup(allBodies[0], "lxml")

        # 合并多个邮件文本段
        if len(allBodies) > 1:
            for o in allBodies[1:]:
                so = BeautifulSoup(o, "lxml")
                b = so.find("body")
                if not b:
                    continue
                for c in b.contents:
                    soup.body.append(c)

        # 判断邮件内容是文本还是链接（包括多个链接的情况）
        links = []
        body = soup.body if soup.find("body") else soup
        if not forceToArticle:
            for s in body.stripped_strings:
                link = IsHyperLink(s)
                if link:
                    if link not in links:
                        links.append(link)
                elif not forceToLinks:  # 如果是多个链接，则必须一行一个，除非强制提取链接
                    break

        if not links and not forceToArticle:  # 正常字符判断没有链接，看html的a标签
            links = [link["href"] for link in soup.find_all("a", attrs={"href": True})]

            text = " ".join([s for s in body.stripped_strings])

            # 如果有相对路径，则在里面找一个绝对路径，然后转换其他
            hasRelativePath = False
            fullPath = ""
            for link in links:
                text = text.replace(link, "")
                if not link.startswith("http"):
                    hasRelativePath = True
                if not fullPath and link.startswith("http"):
                    fullPath = link

            if hasRelativePath and fullPath:
                for idx, link in enumerate(links):
                    if not link.startswith("http"):
                        links[idx] = urllib.urljoin(fullPath, link)

            # 如果字数太多，则认为直接推送正文内容
            if not forceToLinks and (len(links) != 1 or len(text) > WORDCNT_THRESHOLD_FOR_APMAIL):
                links = []

        if links:
            # 判断是下载文件还是转发内容
            isbook = bool(to.lower() in ("book", "file", "download"))
            isbook = link[-5:].lower() in (".mobi", ".epub", ".docx") if not isbook else isbook
            isbook = link[-4:].lower() in (".pdf", ".txt", ".doc", ".rtf") if not isbook else isbook

            param = {
                "u": username,
                "urls": base64.urlsafe_b64encode(zlib.compress("|".join(links), 9)),
                "type": "Download" if isbook else user.book_type,
                "to": user.kindle_email,
                "tz": user.timezone,
                "subject": subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                "lng": user.ownfeeds.language,
                "keepimage": "1" if user.ownfeeds.keep_image else "0",
            }
            taskqueue.add(url="/url2book", queue_name="deliverqueue1", method="GET", params=param, target="worker")
        else:  # 直接转发邮件正文
            # 先判断是否有图片
            from lib.makeoeb import MimeFromFilename

            hasimage = False
            if hasattr(message, "attachments"):
                for f, c in message.attachments:
                    if MimeFromFilename(f):
                        hasimage = True
                        break

            # 先修正不规范的HTML邮件
            h = soup.find("head")
            if not h:
                h = soup.new_tag("head")
                soup.html.insert(0, h)
            t = soup.head.find("title")
            if not t:
                t = soup.new_tag("title")
                t.string = subject
                soup.head.insert(0, t)

            # 有图片的话，要生成MOBI或EPUB才行
            # 而且多看邮箱不支持html推送，也先转换epub再推送
            if hasimage or (user.book_type == "epub"):
                from main import local_time
                from lib.makeoeb import (
                    getOpts,
                    CreateOeb,
                    setMetaData,
                    ServerContainer,
                    byteStringIO,
                    EPUBOutput,
                    MOBIOutput,
                )

                # 仿照Amazon的转换服务器的处理，去掉CSS
                if DELETE_CSS_FOR_APPSPOTMAIL:
                    tag = soup.find("style", attrs={"type": "text/css"})
                    if tag:
                        tag.extract()
                    for tag in soup.find_all(attrs={"style": True}):
                        del tag["style"]

                # 将图片的src的文件名调整好
                for img in soup.find_all("img", attrs={"src": True}):
                    if img["src"].lower().startswith("cid:"):
                        img["src"] = img["src"][4:]

                opts = getOpts()
                oeb = CreateOeb(log, None, opts)

                setMetaData(
                    oeb,
                    subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                    user.ownfeeds.language,
                    local_time(tz=user.timezone),
                    pubtype="book:book:KindleEar",
                )
                oeb.container = ServerContainer(log)
                id, href = oeb.manifest.generate(id="page", href="page.html")
                item = oeb.manifest.add(id, href, "application/xhtml+xml", data=unicode(soup))
                oeb.spine.add(item, False)
                oeb.toc.add(subject, href)

                if hasattr(message, "attachments"):
                    for filename, content in message.attachments:
                        mimetype = MimeFromFilename(filename)
                        if mimetype:
                            try:
                                content = content.decode()
                            except:
                                pass
                            else:
                                id, href = oeb.manifest.generate(id="img", href=filename)
                                item = oeb.manifest.add(id, href, mimetype, data=content)

                oIO = byteStringIO()
                o = EPUBOutput() if user.book_type == "epub" else MOBIOutput()
                o.convert(oeb, oIO, opts, log)
                BaseHandler.SendToKindle(
                    username,
                    user.kindle_email,
                    subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                    user.book_type,
                    str(oIO.getvalue()),
                    user.timezone,
                )
            else:  # 没有图片则直接推送HTML文件，阅读体验更佳
                m = soup.find("meta", attrs={"http-equiv": "Content-Type"})
                if not m:
                    m = soup.new_tag("meta", content="text/html; charset=utf-8")
                    m["http-equiv"] = "Content-Type"
                    soup.html.head.insert(0, m)
                else:
                    m["content"] = "text/html; charset=utf-8"

                html = unicode(soup).encode("utf-8")
                BaseHandler.SendToKindle(
                    username,
                    user.kindle_email,
                    subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                    "html",
                    html,
                    user.timezone,
                    False,
                )
        self.response.out.write("Done")

Пример #4

Показать файл

    def receive(self, message):
        sender = parseaddr(message.sender)[1]
        mailhost = sender.split('@')[1] if sender and '@' in sender else None
        if (not sender or not mailhost) or \
            (not WhiteList.all().filter('mail = ', '*').get()
            and not WhiteList.all().filter('mail = ', sender.lower()).get()
            and not WhiteList.all().filter('mail = ', '@' + mailhost.lower()).get()):
            self.response.out.write("Spam mail!")
            default_log.warn('Spam mail from : %s' % sender)
            return

        if hasattr(message, 'subject'):
            subject = decode_subject(message.subject)
        else:
            subject = u"NoSubject"

        admin = KeUser.all().filter('name = ', 'admin').get()
        if not admin or not admin.kindle_email:
            self.response.out.write('No admin account or no email configured!')
            return

        txt_bodies = message.bodies('text/plain')
        html_bodies = message.bodies('text/html')
        try:
            allBodies = [body.decode() for ctype, body in html_bodies]
        except:
            default_log.warn('Decode html bodies of mail failed.')
            allBodies = []
        if len(allBodies) == 0:  #此邮件为纯文本邮件
            try:
                allBodies = [body.decode() for ctype, body in txt_bodies]
            except:
                default_log.warn('Decode text bodies of mail failed.')
                allBodies = []
            bodies = u''.join(allBodies)
            if not bodies:
                return
            if len(bodies) < 100:  #可能是链接
                R = r"""^(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>???“”‘’]))"""
                M = re.match(R, bodies)
                if M:
                    bodies = '<a href="%s">%s</a>' % (M.group(), M.group())
            bodies = u"""<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
                    <title>%s</title></head><body>%s</body></html>""" % (
                subject, bodies)
            allBodies = [bodies.encode('utf-8')]

        soup = BeautifulSoup(allBodies[0], 'lxml')
        h = soup.find('head')
        if not h:
            h = soup.new_tag('head')
            soup.html.insert(0, h)
        t = soup.find('title')
        if not t:
            t = soup.new_tag('title')
            t.string = subject
            soup.html.head.insert(0, t)

        if len(allBodies) > 1:
            for o in allBodies[1:]:
                so = BeautifulSoup(o, 'lxml')
                b = so.find('body')
                if not b:
                    continue
                for c in b.contents:
                    soup.body.append(c)

        #只有一个链接并且邮件字数很少则认为需要抓取网页，否则直接转发邮件正文
        links = list(soup.body.find_all('a', attrs={'href': True}))
        link = links[0]['href'] if links else ''
        if len(links) == 1 and \
            len(''.join([s for s in soup.body.stripped_strings if not s.endswith(link)])) < 100:
            param = {
                'u': 'admin',
                'url': link,
                'type': admin.book_type,
                'to': admin.kindle_email,
                'tz': admin.timezone,
                'subject': subject[:15],
                'lng': admin.ownfeeds.language,
                'keepimage': '1' if admin.ownfeeds.keep_image else '0'
            }
            taskqueue.add(url='/url2book',
                          queue_name="deliverqueue1",
                          method='GET',
                          params=param)
        else:  #直接转发邮件正文
            #先判断是否有图片
            from lib.makeoeb import MimeFromFilename
            if hasattr(message, 'attachments'):
                for f, c in message.attachments:
                    if MimeFromFilename(f):
                        hasimage = True
                        break
            else:
                hasimage = False

            if hasimage:  #有图片的话，要生成MOBI或EPUB才行
                from main import local_time
                from lib.makeoeb import (getOpts, CreateOeb, setMetaData,
                                         ServerContainer, byteStringIO,
                                         EPUBOutput, MOBIOutput)

                #仿照Amazon的转换服务器的处理，去掉CSS
                if DELETE_CSS_FOR_APPSPOTMAIL:
                    tag = soup.find('style', attrs={'type': 'text/css'})
                    if tag:
                        tag.extract()
                    for tag in soup.find_all(attrs={'style': True}):
                        del tag['style']

                #将图片的src的文件名调整好
                for img in soup.find_all('img', attrs={'src': True}):
                    if img['src'].lower().startswith('cid:'):
                        img['src'] = img['src'][4:]

                opts = getOpts()
                oeb = CreateOeb(default_log, None, opts)

                setMetaData(oeb,
                            subject[:15],
                            admin.ownfeeds.language,
                            local_time(tz=admin.timezone),
                            pubtype='book:book:KindleEar')
                oeb.container = ServerContainer(default_log)
                id, href = oeb.manifest.generate(id='page', href='page.html')
                item = oeb.manifest.add(id,
                                        href,
                                        'application/xhtml+xml',
                                        data=unicode(soup))
                oeb.spine.add(item, False)
                oeb.toc.add(subject, href)

                for filename, content in message.attachments:
                    mimetype = MimeFromFilename(filename)
                    if mimetype:
                        try:
                            content = content.decode()
                        except:
                            pass
                        else:
                            id, href = oeb.manifest.generate(id='img',
                                                             href=filename)
                            item = oeb.manifest.add(id,
                                                    href,
                                                    mimetype,
                                                    data=content)

                oIO = byteStringIO()
                o = EPUBOutput() if admin.book_type == "epub" else MOBIOutput()
                o.convert(oeb, oIO, opts, default_log)
                BaseHandler.SendToKindle('admin', admin.kindle_email,
                                         subject[:15], admin.book_type,
                                         str(oIO.getvalue()), admin.timezone)
            else:  #没有图片则直接推送HTML文件，阅读体验更佳
                m = soup.find('meta', attrs={"http-equiv": "Content-Type"})
                if not m:
                    m = soup.new_tag('meta',
                                     content="text/html; charset=utf-8")
                    m["http-equiv"] = "Content-Type"
                    soup.html.head.insert(0, m)
                else:
                    m['content'] = "text/html; charset=utf-8"

                html = unicode(soup).encode('utf-8')
                BaseHandler.SendToKindle('admin', admin.kindle_email,
                                         subject[:15], 'html', html,
                                         admin.timezone, False)
        self.response.out.write('Done')

Пример #5

Показать файл

Файл: handlemail.py Проект: liujianpc/KindleEar

    def receive(self, message):
        #如果有多个收件人的话，只解释第一个收件人
        to = parseaddr(message.to)[1]
        to = to.split('@')[0] if to and '@' in to else 'xxx'
        if '__' in to:
            listto = to.split('__')
            username = listto[0] if listto[0] else 'admin'
            to = listto[1]
        else:
            username = '******'
            
        user = KeUser.all().filter('name = ', username).get()
        if not user:
            username = '******'
            user = KeUser.all().filter('name = ', username).get()
        
        if not user or not user.kindle_email:
            self.response.out.write('No account or no email configured!')
            return
        
        sender = parseaddr(message.sender)[1]
        mailhost = sender.split('@')[1] if sender and '@' in sender else None
        if (not sender or not mailhost) or \
            (not user.whitelist.filter('mail = ', '*').get()
            and not user.whitelist.filter('mail = ', sender.lower()).get()
            and not user.whitelist.filter('mail = ', '@' + mailhost.lower()).get()):
            self.response.out.write("Spam mail!")
            log.warn('Spam mail from : %s' % sender)
            return
        
        if hasattr(message, 'subject'):
            subject = decode_subject(message.subject)
        else:
            subject = u"NoSubject"
        
        #通过邮件触发一次“现在投递”
        if to.lower() == 'trigger':
            return self.TrigDeliver(subject, username)
        
        #获取和解码邮件内容
        txt_bodies = message.bodies('text/plain')
        html_bodies = message.bodies('text/html')
        try:
            allBodies = [body.decode() for ctype, body in html_bodies]
        except:
            log.warn('Decode html bodies of mail failed.')
            allBodies = []
        
        #此邮件为纯文本邮件
        if len(allBodies) == 0:
            log.info('no html body, use text body.')
            try:
                allBodies = [body.decode() for ctype, body in txt_bodies]
            except:
                log.warn('Decode text bodies of mail failed.')
                allBodies = []
            bodies = u''.join(allBodies)
            if not bodies:
                return
            bodyurls = []
            for l in bodies.split('\n'):
                l = l.strip()
                if not l:
                    continue
                link = IsHyperLink(l)
                if link:
                    bodyurls.append('<a href="%s">%s</a><br />' % (link,link))
                else:
                    break

            bodies = u"""<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
              <title>%s</title></head><body>%s</body></html>""" %(subject,
              ''.join(bodyurls) if bodyurls else bodies)
            allBodies = [bodies.encode('utf-8')]
        
        #开始处理邮件内容
        soup = BeautifulSoup(allBodies[0], 'lxml')
        
        #合并多个邮件文本段
        if len(allBodies) > 1:
            for o in allBodies[1:]:
                so = BeautifulSoup(o, 'lxml')
                b = so.find('body')
                if not b:
                    continue
                for c in b.contents:
                    soup.body.append(c)
        
        #判断邮件内容是文本还是链接（包括多个链接的情况）
        links = []
        body = soup.body if soup.find('body') else soup
        for s in body.stripped_strings:
            link = IsHyperLink(s)
            if link:
                links.append(link)
            else: #如果是多个链接，则必须一行一个
                break
        if not links: #正常字符判断没有链接，看html的a标签
            links = list(soup.find_all('a',attrs={'href':True}))
            link = links[0]['href'] if links else ''
            text = ' '.join([s for s in body.stripped_strings])
            text = text.replace(link, '')
            #如果字数太多，则认为直接推送正文内容
            if len(links) != 1 or len(text) > WORDCNT_THRESHOLD_FOR_APMAIL:
                links = []
            
        if links:
            #判断是下载文件还是转发内容
            isbook = bool(to.lower() in ('book', 'file', 'download'))
            isbook = link[-5:].lower() in ('.mobi','.epub','.docx') if not isbook else isbook
            isbook = link[-4:].lower() in ('.pdf','.txt','.doc','.rtf') if not isbook else isbook
            
            param = {'u':username,
                     'urls':base64.urlsafe_b64encode(zlib.compress('|'.join(links), 9)),
                     'type':'Download' if isbook else user.book_type,
                     'to':user.kindle_email,
                     'tz':user.timezone,
                     'subject':subject[:SUBJECT_WORDCNT_FOR_APMAIL],
                     'lng':user.ownfeeds.language,
                     'keepimage':'1' if user.ownfeeds.keep_image else '0'
                    }
            taskqueue.add(url='/url2book',queue_name="deliverqueue1",method='GET',
                params=param,target='worker')
        else: #直接转发邮件正文
            #先判断是否有图片
            from lib.makeoeb import MimeFromFilename
            hasimage = False
            if hasattr(message, 'attachments'):
                for f,c in message.attachments:
                    if MimeFromFilename(f):
                        hasimage = True
                        break
                        
            #先修正不规范的HTML邮件
            h = soup.find('head')
            if not h:
                h = soup.new_tag('head')
                soup.html.insert(0, h)
            t = soup.head.find('title')
            if not t:
                t = soup.new_tag('title')
                t.string = subject
                soup.head.insert(0, t)
            
            #有图片的话，要生成MOBI或EPUB才行
            #而且多看邮箱不支持html推送，也先转换epub再推送
            if hasimage or (user.book_type == "epub"):
                from main import local_time
                from lib.makeoeb import (getOpts, CreateOeb, setMetaData,
                                    ServerContainer, byteStringIO, 
                                    EPUBOutput, MOBIOutput)
                
                #仿照Amazon的转换服务器的处理，去掉CSS
                if DELETE_CSS_FOR_APPSPOTMAIL:
                    tag = soup.find('style', attrs={'type':'text/css'})
                    if tag:
                        tag.extract()
                    for tag in soup.find_all(attrs={'style':True}):
                        del tag['style']
                
                #将图片的src的文件名调整好
                for img in soup.find_all('img',attrs={'src':True}):
                    if img['src'].lower().startswith('cid:'):
                        img['src'] = img['src'][4:]
                
                opts = getOpts()
                oeb = CreateOeb(log, None, opts)
                
                setMetaData(oeb, subject[:SUBJECT_WORDCNT_FOR_APMAIL], 
                    user.ownfeeds.language, local_time(tz=user.timezone), 
                    pubtype='book:book:KindleEar')
                oeb.container = ServerContainer(log)
                id, href = oeb.manifest.generate(id='page', href='page.html')
                item = oeb.manifest.add(id, href, 'application/xhtml+xml', data=unicode(soup))
                oeb.spine.add(item, False)
                oeb.toc.add(subject, href)
                
                if hasattr(message, 'attachments'):
                    for filename,content in message.attachments:
                        mimetype = MimeFromFilename(filename)
                        if mimetype:
                            try:
                                content = content.decode()
                            except:
                                pass
                            else:
                                id, href = oeb.manifest.generate(id='img', href=filename)
                                item = oeb.manifest.add(id, href, mimetype, data=content)
                
                oIO = byteStringIO()
                o = EPUBOutput() if user.book_type == "epub" else MOBIOutput()
                o.convert(oeb, oIO, opts, log)
                BaseHandler.SendToKindle(username, user.kindle_email, 
                    subject[:SUBJECT_WORDCNT_FOR_APMAIL], 
                    user.book_type, str(oIO.getvalue()), user.timezone)
            else: #没有图片则直接推送HTML文件，阅读体验更佳
                m = soup.find('meta', attrs={"http-equiv":"Content-Type"})
                if not m:
                    m = soup.new_tag('meta', content="text/html; charset=utf-8")
                    m["http-equiv"] = "Content-Type"
                    soup.html.head.insert(0,m)
                else:
                    m['content'] = "text/html; charset=utf-8"
                
                html = unicode(soup).encode('utf-8')
                BaseHandler.SendToKindle(username, user.kindle_email, 
                    subject[:SUBJECT_WORDCNT_FOR_APMAIL], 'html', html, user.timezone, False)
        self.response.out.write('Done')