Exemplo n.º 1
0
 def json_ld(self):
     image = None
     SITEURL = self.settings["SITEURL"]
     if getattr(self, "image", None):
         image = {
             "@type":
             "ImageObject",
             "url":
             "%s/images/thumb/%s-1012x422.jpg" %
             (SITEURL, self.image.rpartition(".")[0]),
             "width":
             1012,
             "height":
             422,
         }
     data = {
         "@context":
         "http://schema.org",
         "@type":
         "BlogPosting",
         "headline":
         striptags(self.title),
         "image":
         image,
         "keywords":
         ", ".join(sorted(map(str, self.tags))),
         "url":
         "%s/%s" % (SITEURL, self.url),
         "datePublished":
         self.date.isoformat(),
         "dateModified":
         getattr(self, "modified", self.date).isoformat(),
         "description":
         striptags(self.summary),
         "publisher": {
             "@type": "Person",
             "name": "Markus Holtermann",
         },
         "mainEntityOfPage":
         "%s/%s" % (SITEURL, self.url),
         "author": [{
             "@type": "Person",
             "name": str(author),
             "url": "%s/%s" % (SITEURL, author.url),
         } for author in self.authors],
     }
     data = {k: v for k, v in data.items() if v is not None}
     return json.dumps(data)
Exemplo n.º 2
0
 def json_ld(self):
     image = None
     SITEURL = self.settings['SITEURL']
     if getattr(self, 'image', None):
         image = {
             '@type': 'ImageObject',
             'url': '%s/images/thumb/%s-1012x422.jpg' % (
                 SITEURL, self.image.rpartition('.')[0]
             ),
             'width': 1012,
             'height': 422,
         }
     data = {
         '@context': 'http://schema.org',
         '@type': 'BlogPosting',
         'headline': striptags(self.title),
         'image': image,
         'keywords': ', '.join(sorted(map(str, self.tags))),
         'url': '%s/%s' % (SITEURL, self.url),
         'datePublished': self.date.isoformat(),
         'dateModified': getattr(self, 'modified', self.date).isoformat(),
         'description': striptags(self.summary),
         'publisher': {
             '@type': 'Person',
             'name': 'Markus Holtermann',
         },
         'mainEntityOfPage': '%s/%s' % (SITEURL, self.url),
         'author': [{
             '@type': 'Person',
             'name': str(author),
             'url': '%s/%s' % (SITEURL, author.url),
         } for author in self.authors],
     }
     data = {
         k: v
         for k, v in data.items()
         if v is not None
     }
     return json.dumps(data)
def get_posts(domain, post_type="text", offset=0, limit=200):
    url = API_URL_POSTS.format(url_base=API_URL, domain=domain,
                               post_type=post_type, api_key=API_KEY,
                               offset=offset, limit=limit)
    data = requests.get(url).json
    assert data["meta"]["status"] == 200, repr(data["meta"])
    posts = data["response"]["posts"]

    for post in posts:
        post["utcdate"] = dateutil.parser.parse(post["date"])
        post["date"] = post["utcdate"].astimezone(dateutil.tz.tzlocal())
        if not post["slug"].strip():
            post["slug"] = str(post["id"])
        post.setdefault("title", striptags(post.get("caption", "")))
        post.setdefault("new_slug", post.get("slug", ""))
        if post["type"] == "photo":
            post["tags"].append("照片")
        yield post

    #: paginate
    if limit - offset > 20:
        for post in get_posts(domain, post_type, offset + 20, limit):
            yield post
Exemplo n.º 4
0
def extract_text(message):
    if message.is_multipart():
        payload_list = message.get_payload()

        text_payload_list = (
            p for p in payload_list if p.get_content_type() == 'text/plain')
        rich_payload_list = (
            p for p in payload_list if p.get_content_type() == 'text/html')

        chosen = next(text_payload_list, next(rich_payload_list, None))
        if not chosen:
            raise ValueError('need plain text or html message')
    else:
        chosen = message

    payload = chosen.get_payload()
    content_charset = chosen.get_content_charset()
    transfer_encoding = chosen.get('Content-Transfer-Encoding')

    # decodes with transfer encoding
    transfer_decoder = {
        'quoted-printable': quopri.decodestring,
        'base64': base64.decodestring,
    }.get(transfer_encoding)
    if transfer_decoder is not None:
        payload = transfer_decoder(payload)
    elif transfer_encoding not in (None, '7bit', '8bit'):
        raise ValueError('unknown transfer encoding in message')

    # decodes to unicode
    text = payload.decode(content_charset)

    # strip html tags
    if chosen.get_content_type() == 'text/html':
        text = striptags(text)

    return text.strip()
Exemplo n.º 5
0
def html_summary(html):
    if not isinstance(html, unicode):
        html = html.decode('utf-8')
    # return truncate(striptags(html), length=250)
    return striptags(html)
Exemplo n.º 6
0
def html_summary(html):
    return truncate(striptags(html), length=160)
Exemplo n.º 7
0
Arquivo: models.py Projeto: svven/web
 def clean_title(self):
     return self.title and striptags(self.title) or self.site
Exemplo n.º 8
0
 def get_text(self, tpl=None):
     if not tpl:
         return ''
     return striptags(self.get_html(tpl))
Exemplo n.º 9
0
Arquivo: models.py Projeto: svven/web
 def clean_title(self):
     return self.title and striptags(self.title) or self.site