Exemplo n.º 1
0
def render_content(obj,
                   include_location=True,
                   synthesize_content=True,
                   render_attachments=False):
    """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    string, rendered HTML
  """
    content = obj.get('content', '')

    # extract tags. preserve order but de-dupe, ie don't include a tag more than
    # once.
    seen_ids = set()
    mentions = []
    tags = {}  # maps string objectType to list of tag objects
    for t in obj.get('tags', []):
        id = t.get('id')
        if id and id in seen_ids:
            continue
        seen_ids.add(id)

        if 'startIndex' in t and 'length' in t:
            mentions.append(t)
        else:
            tags.setdefault(source.object_type(t), []).append(t)

    # linkify embedded mention tags inside content.
    if mentions:
        mentions.sort(key=lambda t: t['startIndex'])
        last_end = 0
        orig = util.WideUnicode(content)
        content = util.WideUnicode('')
        for tag in mentions:
            start = tag['startIndex']
            end = start + tag['length']
            content = util.WideUnicode(
                '%s%s<a href="%s">%s</a>' %
                (content, orig[last_end:start], tag['url'], orig[start:end]))
            last_end = end

        content += orig[last_end:]

    if not obj.get('content_is_html'):
        # convert newlines to <br>s
        # do this *after* linkifying tags so we don't have to shuffle indices over
        content = content.replace('\n', '<br />\n')

    # linkify embedded links. ignore the "mention" tags that we added ourselves.
    # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
    # uncomment this.
    # if content:
    #   content = util.linkify(content)

    # attachments, e.g. links (aka articles)
    # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
    if render_attachments:
        atts = [
            a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
        ]
        content += _render_attachments(atts + tags.pop('article', []), obj)

    # generate share/like contexts if the activity does not have content
    # of its own
    obj_type = source.object_type(obj)
    for as_type, verb in (('favorite', 'Favorites'), ('like', 'Likes'),
                          ('share', 'Shared')):
        if (not synthesize_content or obj_type != as_type
                or 'object' not in obj or 'content' in obj):
            continue

        targets = get_list(obj, 'object')
        if not targets:
            continue

        for target in targets:
            # sometimes likes don't have enough content to render anything
            # interesting
            if 'url' in target and set(target) <= set(['url', 'objectType']):
                content += '<a href="%s">%s this.</a>' % (target.get('url'),
                                                          verb.lower())

            else:
                author = target.get('author', target.get('actor', {}))
                # special case for twitter RT's
                if obj_type == 'share' and 'url' in obj and re.search(
                        '^https?://(?:www\.|mobile\.)?twitter\.com/',
                        obj.get('url')):
                    content += 'RT <a href="%s">@%s</a> ' % (target.get(
                        'url', '#'), author.get('username'))
                else:
                    # image looks bad in the simplified rendering
                    author = {k: v for k, v in author.items() if k != 'image'}
                    content += '%s <a href="%s">%s</a> by %s' % (
                        verb,
                        target.get('url', '#'),
                        target.get('displayName', target.get(
                            'title', 'a post')),
                        hcard_to_html(
                            object_to_json(author,
                                           default_object_type='person')),
                    )
                content += render_content(
                    target,
                    include_location=include_location,
                    synthesize_content=synthesize_content)
            # only include the first context in the content (if there are
            # others, they'll be included as separate properties)
            break
        break

    if render_attachments and obj.get('verb') == 'share':
        atts = [
            a for a in obj.get('object', {}).get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
        ]
        content += _render_attachments(atts, obj)

    # location
    loc = obj.get('location')
    if include_location and loc:
        content += '\n<p>%s</p>' % hcard_to_html(object_to_json(
            loc, default_object_type='place'),
                                                 parent_props=['p-location'])

    # these are rendered manually in json_to_html()
    for type in 'like', 'share', 'react', 'person':
        tags.pop(type, None)

    # render the rest
    content += tags_to_html(tags.pop('hashtag', []), 'p-category')
    content += tags_to_html(tags.pop('mention', []), 'u-mention')
    content += tags_to_html(sum(tags.values(), []), 'tag')

    return content
Exemplo n.º 2
0
    def status_to_object(self, status):
        """Converts a status to an object.

    Args:
      status: dict, a decoded JSON status

    Returns:
      an ActivityStreams object dict, ready to be JSON-encoded
    """
        id = status.get('id')
        if not id:
            return {}

        obj = {
            'objectType': 'note',
            'id': self.tag_uri(id),
            'url': status.get('url'),
            'published': status.get('created_at'),
            'author': self.user_to_actor(status.get('account') or {}),
            'attachments': [],
        }

        reblog = status.get('reblog')
        base_status = reblog if reblog else status

        # media! into attachments.
        for media in status.get('media_attachments', []):
            type = media.get('type')
            att = {
                'id': self.tag_uri(media.get('id')),
                'objectType': MEDIA_TYPES.get(type),
                'displayName': media.get('description'),
            }
            url = media.get('url')
            if type == 'image':
                att['image'] = {'url': url}
            elif type in ('gifv', 'video'):
                att.update({
                    'stream': {
                        'url': url
                    },
                    'image': {
                        'url': media.get('preview_url')
                    },
                })
            obj['attachments'].append(att)

        if obj['attachments']:
            first = obj['attachments'][0]
            if first['objectType'] == 'video':
                obj['stream'] = first.get('stream')
            else:
                obj['image'] = first.get('image')

        # tags
        obj['tags'] = [{
            'objectType': 'person',
            'id': self.tag_uri(t.get('id')),
            'url': t.get('url'),
            'displayName': t.get('username'),
        } for t in status.get('mentions', [])] + [{
            'objectType': 'hashtag',
            'url': t.get('url'),
            'displayName': t.get('name'),
        } for t in status.get('tags', [])]

        card = status.get('card')
        if card:
            obj['tags'].append({
                'objectType': 'article',
                'url': card.get('url'),
                'displayName': card.get('title'),
                'content': card.get('description'),
                'image': {
                    'url': card.get('image')
                },
            })

        # content: insert images for custom emoji
        # https://docs.joinmastodon.org/api/entities/#emoji
        content = base_status.get('content') or ''
        for emoji in base_status.get('emojis', []):
            shortcode = emoji.get('shortcode')
            url = emoji.get('url')
            if shortcode and url:
                content = re.sub(
                    r'(^|[^\w]):%s:([^\w]|$)' % shortcode,
                    r'\1<img alt="%s" src="%s" style="height: 1em">\2' %
                    (shortcode, url), content)

        # content: add 'Boosted @username:'******'content'):
            reblog_account = reblog.get('account')
            content = 'Boosted <a href="%s">@%s</a>: ' % (
                (reblog_account.get('url'),
                 reblog_account.get('username'))) + content

        obj['content'] = util.WideUnicode(content)

        # inReplyTo
        reply_to_id = status.get('in_reply_to_id')
        if reply_to_id:
            obj['inReplyTo'] = [{
                'id':
                self.tag_uri(reply_to_id),
                # Mastodon's in_reply_to_id is str, Pixelfed's is int.
                'url':
                urllib.parse.urljoin(self.instance,
                                     '/web/statuses/' + str(reply_to_id)),
            }]

        # to (ie visibility)
        visibility = status.get('visibility')
        if visibility:
            obj['to'] = [{
                'objectType': 'group',
                'alias': '@' + visibility,
            }]

        return self.postprocess_object(obj)
Exemplo n.º 3
0
def render_content(obj, include_location=True, synthesize_content=True,
                   render_attachments=False, render_image=False,
                   white_space_pre=True):
  """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: boolean, whether to render location, if provided
    synthesize_content: boolean, whether to generate synthetic content if the
      object doesn't have its own, e.g. 'likes this.' or 'shared this.'
    render_attachments: boolean, whether to render attachments, eg links,
      images, audio, and video
    render_image: boolean, whether to render the object's image(s)
    white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False,
      newlines will be converted to <br> tags instead. Background:
      https://indiewebcamp.com/note#Indieweb_whitespace_thinking

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t and 'url' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = util.WideUnicode(content)
    content = util.WideUnicode('')
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content = util.WideUnicode('%s%s<a href="%s">%s</a>' % (
        content, orig[last_end:start], tag['url'], orig[start:end]))
      last_end = end

    content += orig[last_end:]

  # is whitespace in this content meaningful? standard heuristic: if there are
  # no HTML tags in it, and it has a newline, then assume yes.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  # https://github.com/snarfed/granary/issues/80
  if content and not obj.get('content_is_html') and '\n' in content:
    if white_space_pre:
      content = '<div style="white-space: pre">%s</div>' % content
    else:
      content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # the image field. may be multiply valued.
  rendered_urls = set()
  if render_image:
    urls = get_urls(obj, 'image')
    content += _render_attachments([{
      'objectType': 'image',
      'image': {'url': url},
    } for url in urls], obj)
    rendered_urls = set(urls)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  if render_attachments:
    atts = [a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
            and get_url(a, 'image') not in rendered_urls]
    content += _render_attachments(atts + tags.pop('article', []), obj)

  # generate share/like contexts if the activity does not have content
  # of its own
  obj_type = source.object_type(obj)
  for as_type, verb in (
      ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')):
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.items() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content,
                                  white_space_pre=white_space_pre)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  if render_attachments and obj.get('verb') == 'share':
    atts = [att for att in itertools.chain.from_iterable(
              o.get('attachments', []) for o in util.get_list(obj, 'object'))
            if att.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts, obj)

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n<p>%s</p>' % hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False)
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content