def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode( '%s%s<a href="%s">%s</a>' % (content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] if not obj.get('content_is_html'): # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [ a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') ] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in (('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % (target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % (target.get( 'url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get( 'title', 'a post')), hcard_to_html( object_to_json(author, default_object_type='person')), ) content += render_content( target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [ a for a in obj.get('object', {}).get('attachments', []) if a.get('objectType') not in ('note', 'article') ] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html(object_to_json( loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def status_to_object(self, status): """Converts a status to an object. Args: status: dict, a decoded JSON status Returns: an ActivityStreams object dict, ready to be JSON-encoded """ id = status.get('id') if not id: return {} obj = { 'objectType': 'note', 'id': self.tag_uri(id), 'url': status.get('url'), 'published': status.get('created_at'), 'author': self.user_to_actor(status.get('account') or {}), 'attachments': [], } reblog = status.get('reblog') base_status = reblog if reblog else status # media! into attachments. for media in status.get('media_attachments', []): type = media.get('type') att = { 'id': self.tag_uri(media.get('id')), 'objectType': MEDIA_TYPES.get(type), 'displayName': media.get('description'), } url = media.get('url') if type == 'image': att['image'] = {'url': url} elif type in ('gifv', 'video'): att.update({ 'stream': { 'url': url }, 'image': { 'url': media.get('preview_url') }, }) obj['attachments'].append(att) if obj['attachments']: first = obj['attachments'][0] if first['objectType'] == 'video': obj['stream'] = first.get('stream') else: obj['image'] = first.get('image') # tags obj['tags'] = [{ 'objectType': 'person', 'id': self.tag_uri(t.get('id')), 'url': t.get('url'), 'displayName': t.get('username'), } for t in status.get('mentions', [])] + [{ 'objectType': 'hashtag', 'url': t.get('url'), 'displayName': t.get('name'), } for t in status.get('tags', [])] card = status.get('card') if card: obj['tags'].append({ 'objectType': 'article', 'url': card.get('url'), 'displayName': card.get('title'), 'content': card.get('description'), 'image': { 'url': card.get('image') }, }) # content: insert images for custom emoji # https://docs.joinmastodon.org/api/entities/#emoji content = base_status.get('content') or '' for emoji in base_status.get('emojis', []): shortcode = emoji.get('shortcode') url = emoji.get('url') if shortcode and url: content = re.sub( r'(^|[^\w]):%s:([^\w]|$)' % shortcode, r'\1<img alt="%s" src="%s" style="height: 1em">\2' % (shortcode, url), content) # content: add 'Boosted @username:'******'content'): reblog_account = reblog.get('account') content = 'Boosted <a href="%s">@%s</a>: ' % ( (reblog_account.get('url'), reblog_account.get('username'))) + content obj['content'] = util.WideUnicode(content) # inReplyTo reply_to_id = status.get('in_reply_to_id') if reply_to_id: obj['inReplyTo'] = [{ 'id': self.tag_uri(reply_to_id), # Mastodon's in_reply_to_id is str, Pixelfed's is int. 'url': urllib.parse.urljoin(self.instance, '/web/statuses/' + str(reply_to_id)), }] # to (ie visibility) visibility = status.get('visibility') if visibility: obj['to'] = [{ 'objectType': 'group', 'alias': '@' + visibility, }] return self.postprocess_object(obj)
def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False, render_image=False, white_space_pre=True): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: boolean, whether to render location, if provided synthesize_content: boolean, whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' render_attachments: boolean, whether to render attachments, eg links, images, audio, and video render_image: boolean, whether to render the object's image(s) white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False, newlines will be converted to <br> tags instead. Background: https://indiewebcamp.com/note#Indieweb_whitespace_thinking Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t and 'url' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode('%s%s<a href="%s">%s</a>' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] # is whitespace in this content meaningful? standard heuristic: if there are # no HTML tags in it, and it has a newline, then assume yes. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking # https://github.com/snarfed/granary/issues/80 if content and not obj.get('content_is_html') and '\n' in content: if white_space_pre: content = '<div style="white-space: pre">%s</div>' % content else: content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then # uncomment this. # if content: # content = util.linkify(content) # the image field. may be multiply valued. rendered_urls = set() if render_image: urls = get_urls(obj, 'image') content += _render_attachments([{ 'objectType': 'image', 'image': {'url': url}, } for url in urls], obj) rendered_urls = set(urls) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') and get_url(a, 'image') not in rendered_urls] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in ( ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content, white_space_pre=white_space_pre) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [att for att in itertools.chain.from_iterable( o.get('attachments', []) for o in util.get_list(obj, 'object')) if att.get('objectType') not in ('note', 'article')] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False) content += tags_to_html(sum(tags.values(), []), 'tag') return content