def send_webmentions(handler, activity, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity: dict, AS1 activity response_props: passed through to the newly created Responses """ verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source: source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) if verb in ('like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find target URLs (inReplyTo or object)") # send webmentions and store Responses errors = [] for target in targets: if not target: continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = response.proxy_url() if verb in ('like', 'share') else source logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def append_in_reply_to(before, after): """appends the inReplyTos from the before object to the after object, in place Args: before, after: dicts, ActivityStreams activities or objects """ obj_b = before.get('object', before) obj_a = after.get('object', after) if obj_b and obj_a: reply_b = util.get_list(obj_b, 'inReplyTo') reply_a = util.get_list(obj_a, 'inReplyTo') obj_a['inReplyTo'] = util.dedupe_urls(reply_a + reply_b)
def base_object(self, obj): """Returns the 'base' Mastodon object that an object operates on. If the object is a reply, boost, or favorite of a Mastodon post - on any instance - this returns that post object. The id in the returned object is the id of that remote post *on the local instance*. (As a Mastodon style id, ie an integer in a string, *not* a tag URI.) Uses Mastodon's search API on the local instance to determine whether a URL is a Mastodon post, and if it is, to find or generate an id for it on the local instance. Discovered via https://mastodon.social/@jkreeftmeijer/101245063526942536 Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ for field in ('inReplyTo', 'object', 'target'): for base in util.get_list(obj, field): # first, check if it's on local instance url = util.get_url(base) if url.startswith(self.instance): return self._postprocess_base_object(base) # nope; try mastodon's search API try: results = self._get(API_SEARCH, params={ 'q': url, 'resolve': True }) except requests.RequestException as e: logging.info("%s URL %s doesn't look like Mastodon:", field, url) continue for status in results.get('statuses', []): if url in (status.get('url'), status.get('uri')): # found it! base = self.status_to_object(status) base['id'] = status['id'] return self._postprocess_base_object(base) return {}
def base_object(self, obj): """Returns the 'base' silo object that an object operates on. For example, if the object is a comment, this returns the post that it's a comment on. If it's an RSVP, this returns the event. The id in the returned object is silo-specific, ie not a tag URI. Subclasses may override this. Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ # look at in-reply-tos first, then objects (for likes and reposts). # technically, the ActivityStreams 'object' field is always supposed to be # singular, but microformats2.json_to_object() sometimes returns activities # that have a list value, e.g. likes or reposts of multiple objects. candidates = [] for field in ('inReplyTo', 'object', 'target'): candidates += util.get_list(obj, field) for base_obj in candidates: parsed_id = util.parse_tag_uri(base_obj.get('id', '')) if parsed_id: domain = parsed_id[0] else: domain = util.domain_from_link(base_obj.get('url', '')) if domain == self.DOMAIN: break else: return {} base_obj = copy.deepcopy(base_obj) id = base_obj.get('id') url = base_obj.get('url') if id: parsed = util.parse_tag_uri(id) if parsed: base_obj['id'] = parsed[1] elif url: base_obj['id'] = self.base_id(url) return base_obj
def to_as1(obj, use_type=True): """Converts an ActivityStreams 2 activity or object to ActivityStreams 1. Args: obj: dict, AS2 activity or object use_type: boolean, whether to include objectType and verb Returns: dict, AS1 activity or object """ if not obj: return {} elif isinstance(obj, str): return {'url': obj} elif not isinstance(obj, dict): raise ValueError('Expected dict, got %r' % obj) obj = copy.deepcopy(obj) obj.pop('@context', None) type = obj.pop('type', None) if use_type: obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type) obj['verb'] = TYPE_TO_VERB.get(type) if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'): obj['objectType'] = 'comment' elif obj['verb'] and not obj['objectType']: obj['objectType'] = 'activity' def url_or_as1(val): return {'url': val} if isinstance(val, str) else to_as1(val) def all_to_as1(field): return [to_as1(elem) for elem in util.pop_list(obj, field)] images = [] # icon first since e.g. Mastodon uses icon for profile picture, # image for featured photo. for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'): as1_img = to_as1(as2_img, use_type=False) if as1_img not in images: images.append(as1_img) # inner objects inner_objs = all_to_as1('object') actor = to_as1(obj.get('actor', {})) if type == 'Create': for inner_obj in inner_objs: inner_obj.setdefault('author', {}).update(actor) if len(inner_objs) == 1: inner_objs = inner_objs[0] obj.update({ 'displayName': obj.pop('name', None), 'username': obj.pop('preferredUsername', None), 'actor': actor, 'attachments': all_to_as1('attachment'), 'image': images, 'inReplyTo': [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')], 'location': url_or_as1(obj.get('location')), 'object': inner_objs, 'tags': all_to_as1('tag'), }) # media if type in ('Audio', 'Video'): duration = util.parse_iso8601_duration(obj.pop('duration', None)) if duration: duration = duration.total_seconds() obj['stream'] = { 'url': obj.pop('url', None), # file size in bytes. nonstandard, not in AS1 proper 'size': obj.pop('size', None), 'duration': duration or None, } elif type == 'Mention': obj['url'] = obj.pop('href', None) # object author attrib = util.pop_list(obj, 'attributedTo') if attrib: if len(attrib) > 1: logging.warning('ActivityStreams 1 only supports single author; ' 'dropping extra attributedTo values: %s' % attrib[1:]) obj.setdefault('author', {}).update(to_as1(attrib[0])) return util.trim_nulls(obj)
def to_as1(obj, use_type=True): """Converts an ActivityStreams 2 activity or object to ActivityStreams 1. Args: obj: dict, AS2 activity or object use_type: boolean, whether to include objectType and verb Returns: dict, AS1 activity or object """ if not obj: return {} elif isinstance(obj, basestring): return {'url': obj} elif not isinstance(obj, dict): raise ValueError('Expected dict, got %r' % obj) obj = copy.deepcopy(obj) obj.pop('@context', None) type = obj.pop('type', None) if use_type: obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type) obj['verb'] = TYPE_TO_VERB.get(type) if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'): obj['objectType'] = 'comment' elif obj['verb'] and not obj['objectType']: obj['objectType'] = 'activity' def url_or_as1(val): return {'url': val} if isinstance(val, basestring) else to_as1(val) def all_to_as1(field): return [to_as1(elem) for elem in util.pop_list(obj, field)] images = [] # icon first since e.g. Mastodon uses icon for profile picture, # image for featured photo. for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'): as1_img = to_as1(as2_img, use_type=False) if as1_img not in images: images.append(as1_img) obj.update({ 'displayName': obj.pop('name', None), 'actor': to_as1(obj.get('actor')), 'attachments': all_to_as1('attachment'), 'image': images, 'inReplyTo': [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')], 'location': url_or_as1(obj.get('location')), 'object': to_as1(obj.get('object')), 'tags': all_to_as1('tag'), }) if type in ('Audio', 'Video'): obj['stream'] = {'url': obj.pop('url', None)} attrib = util.pop_list(obj, 'attributedTo') if attrib: if len(attrib) > 1: logging.warning('ActivityStreams 1 only supports single author; ' 'dropping extra attributedTo values: %s' % attrib[1:]) obj['author'] = to_as1(attrib[0]) return util.trim_nulls(obj)
def _create(self, obj, preview=None, include_link=False, ignore_formatting=False): """Creates or previews creating a tweet, reply tweet, retweet, or favorite. https://dev.twitter.com/docs/api/1.1/post/statuses/update https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id https://dev.twitter.com/docs/api/1.1/post/favorites/create Args: obj: ActivityStreams object preview: boolean include_link: boolean Returns: a CreationResult If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created Twitter object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or 'inReplyTo' in obj image_urls = [image.get('url') for image in util.get_list(obj, 'image')] video_url = util.get_first(obj, 'stream', {}).get('url') has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply) lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # prefer displayName over content for articles type = obj.get('objectType') base_url = self.base_object(obj).get('url') prefer_content = type == 'note' or (base_url and (type == 'comment' or obj.get('inReplyTo'))) content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content, strip_first_video_tag=bool(video_url)) if not content: if type == 'activity': content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') if is_reply and base_url: # extract username from in-reply-to URL so we can @-mention it, if it's # not already @-mentioned, since Twitter requires that to make our new # tweet a reply. # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id # TODO: this doesn't handle an in-reply-to username that's a prefix of # another username already mentioned, e.g. in reply to @foo when content # includes @foobar. parsed = urlparse.urlparse(base_url) parts = parsed.path.split('/') if len(parts) < 2 or not parts[1]: raise ValueError('Could not determine author of in-reply-to URL %s' % base_url) mention = '@' + parts[1] if mention.lower() not in content.lower(): content = mention + ' ' + content # the embed URL in the preview can't start with mobile. or www., so just # hard-code it to twitter.com. index #1 is netloc. parsed = list(parsed) parsed[1] = self.DOMAIN base_url = urlparse.urlunparse(parsed) # need a base_url with the tweet id for the embed HTML below. do this # *after* checking the real base_url for in-reply-to author username. if base_id and not base_url: base_url = 'https://twitter.com/-/statuses/' + base_id if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to reply to.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') # truncate and ellipsize content if it's over the character # count. URLs will be t.co-wrapped, so include that when counting. include_url = obj.get('url') if include_link else None content = self._truncate(content, include_url, has_media) # linkify defaults to Twitter's link shortening behavior preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True) if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to like.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. ' 'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">favorite</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) self.urlopen(API_POST_FAVORITE, data=data) resp = {'type': 'like'} elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to retweet.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. ' 'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">retweet</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) resp = self.urlopen(API_POST_RETWEET % base_id, data=data) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply: # a tweet content = unicode(content).encode('utf-8') data = {'status': content} if is_reply: description = \ '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % ( base_url, self.embed_post(base_obj)) data['in_reply_to_status_id'] = base_id else: description = '<span class="verb">tweet</span>:' if video_url: preview_content += ('<br /><br /><video controls src="%s"><a href="%s">' 'this video</a></video>' % (video_url, video_url)) if not preview: ret = self.upload_video(video_url) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ret elif image_urls: num_urls = len(image_urls) if num_urls > MAX_MEDIA: image_urls = image_urls[:MAX_MEDIA] logging.warning('Found %d photos! Only using the first %d: %r', num_urls, MAX_MEDIA, image_urls) preview_content += '<br /><br />' + ' '.join( '<img src="%s" />' % url for url in image_urls) if not preview: data['media_ids'] = ','.join(self.upload_images(image_urls)) if lat and lng: preview_content += ( '<div>at <a href="https://maps.google.com/maps?q=%s,%s">' '%s, %s</a></div>' % (lat, lng, lat, lng)) data['lat'] = lat data['long'] = lng if preview: return source.creation_result(content=preview_content, description=description) else: resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data)) resp['type'] = 'comment' if is_reply else 'post' elif (verb and verb.startswith('rsvp-')) or verb == 'invite': return source.creation_result( abort=True, error_plain='Cannot publish RSVPs to Twitter.', error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. ' 'Publishing events or RSVPs to Twitter is not supported.') else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb)) id_str = resp.get('id_str') if id_str: resp.update({'id': id_str, 'url': self.tweet_url(resp)}) elif 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance( entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': dedupe_urls( get_urls(attachments, 'image', 'image') + get_urls(primary, 'image')), 'video': dedupe_urls( get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': [ object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] + attachments['article'] ] } # hashtags and person tags tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('favorite', 'like'), ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False, render_image=False, white_space_pre=True): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: boolean, whether to render location, if provided synthesize_content: boolean, whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' render_attachments: boolean, whether to render attachments, eg links, images, audio, and video render_image: boolean, whether to render the object's image(s) white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False, newlines will be converted to <br> tags instead. Background: https://indiewebcamp.com/note#Indieweb_whitespace_thinking Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t and 'url' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode('%s%s<a href="%s">%s</a>' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] # is whitespace in this content meaningful? standard heuristic: if there are # no HTML tags in it, and it has a newline, then assume yes. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking # https://github.com/snarfed/granary/issues/80 if content and not obj.get('content_is_html') and '\n' in content: if white_space_pre: content = '<div style="white-space: pre">%s</div>' % content else: content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then # uncomment this. # if content: # content = util.linkify(content) # the image field. may be multiply valued. rendered_urls = set() if render_image: urls = get_urls(obj, 'image') content += _render_attachments([{ 'objectType': 'image', 'image': {'url': url}, } for url in urls], obj) rendered_urls = set(urls) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') and get_url(a, 'image') not in rendered_urls] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in ( ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content, white_space_pre=white_space_pre) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [att for att in itertools.chain.from_iterable( o.get('attachments', []) for o in util.get_list(obj, 'object')) if att.get('objectType') not in ('note', 'article')] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False) content += tags_to_html(sum(tags.values(), []), 'tag') return content
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews creating a tweet, reply tweet, retweet, or favorite. https://dev.twitter.com/docs/api/1.1/post/statuses/update https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id https://dev.twitter.com/docs/api/1.1/post/favorites/create Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: a CreationResult If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created Twitter object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or 'inReplyTo' in obj image_urls = [image.get('url') for image in util.get_list(obj, 'image')] video_url = util.get_first(obj, 'stream', {}).get('url') has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply) lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # prefer displayName over content for articles type = obj.get('objectType') base_url = self.base_object(obj).get('url') prefer_content = type == 'note' or (base_url and (type == 'comment' or obj.get('inReplyTo'))) content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content, strip_first_video_tag=bool(video_url)) if not content: if type == 'activity': content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') if is_reply and base_url: # Twitter *used* to require replies to include an @-mention of the # original tweet's author # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id # ...but now we use the auto_populate_reply_metadata query param instead: # https://dev.twitter.com/overview/api/upcoming-changes-to-tweets # the embed URL in the preview can't start with mobile. or www., so just # hard-code it to twitter.com. index #1 is netloc. parsed = urlparse.urlparse(base_url) parts = parsed.path.split('/') if len(parts) < 2 or not parts[1]: raise ValueError('Could not determine author of in-reply-to URL %s' % base_url) reply_to_prefix = '@%s ' % parts[1].lower() if content.lower().startswith(reply_to_prefix): content = content[len(reply_to_prefix):] parsed = list(parsed) parsed[1] = self.DOMAIN base_url = urlparse.urlunparse(parsed) # need a base_url with the tweet id for the embed HTML below. do this # *after* checking the real base_url for in-reply-to author username. if base_id and not base_url: base_url = 'https://twitter.com/-/statuses/' + base_id if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to reply to.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') # truncate and ellipsize content if it's over the character # count. URLs will be t.co-wrapped, so include that when counting. content = self._truncate( content, obj.get('url'), include_link, type) # linkify defaults to Twitter's link shortening behavior preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True) if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to like.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. ' 'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">favorite</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) self.urlopen(API_POST_FAVORITE, data=data) resp = {'type': 'like'} elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to retweet.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. ' 'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">retweet</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) resp = self.urlopen(API_POST_RETWEET % base_id, data=data) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply: # a tweet content = unicode(content).encode('utf-8') data = {'status': content} if is_reply: description = \ '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % ( base_url, self.embed_post(base_obj)) data.update({ 'in_reply_to_status_id': base_id, 'auto_populate_reply_metadata': 'true', }) else: description = '<span class="verb">tweet</span>:' if video_url: preview_content += ('<br /><br /><video controls src="%s"><a href="%s">' 'this video</a></video>' % (video_url, video_url)) if not preview: ret = self.upload_video(video_url) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ret elif image_urls: num_urls = len(image_urls) if num_urls > MAX_MEDIA: image_urls = image_urls[:MAX_MEDIA] logging.warning('Found %d photos! Only using the first %d: %r', num_urls, MAX_MEDIA, image_urls) preview_content += '<br /><br />' + ' '.join( '<img src="%s" />' % url for url in image_urls) if not preview: ret = self.upload_images(image_urls) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ','.join(ret) if lat and lng: preview_content += ( '<div>at <a href="https://maps.google.com/maps?q=%s,%s">' '%s, %s</a></div>' % (lat, lng, lat, lng)) data['lat'] = lat data['long'] = lng if preview: return source.creation_result(content=preview_content, description=description) else: resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data)) resp['type'] = 'comment' if is_reply else 'post' elif (verb and verb.startswith('rsvp-')) or verb == 'invite': return source.creation_result( abort=True, error_plain='Cannot publish RSVPs to Twitter.', error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. ' 'Publishing events or RSVPs to Twitter is not supported.') else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb)) id_str = resp.get('id_str') if id_str: resp.update({'id': id_str, 'url': self.tweet_url(resp)}) elif 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs): """Discovers original post links. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. More background: https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857 Original post candidates come from the upstreamDuplicates, attachments, and tags fields, as well as links and permashortlinks/permashortcitations in the text content. Args: activity: activity dict domains: optional sequence of domains. If provided, only links to these domains will be considered original and stored in upstreamDuplicates. (Permashortcitations are exempt.) include_redirect_sources: boolean, whether to include URLs that redirect as well as their final destination URLs cache: deprecated, unused kwargs: passed to requests.head() when following redirects Returns: ([string original post URLs], [string mention URLs]) tuple """ obj = activity.get('object') or activity content = obj.get('content', '').strip() # find all candidate URLs tags = [t.get('url') for t in obj.get('attachments', []) + obj.get('tags', []) if t.get('objectType') in ('article', 'mention', 'note', None)] candidates = (tags + util.extract_links(content) + obj.get('upstreamDuplicates', []) + util.get_list(obj, 'targetUrl')) # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short # references to canonical copies of a given (usually syndicated) post, of # the form (DOMAIN PATH). We consider them an explicit original post link. candidates += [match.expand(r'http://\1/\2') for match in Source._PERMASHORTCITATION_RE.finditer(content)] candidates = set(util.dedupe_urls( util.clean_url(url) for url in candidates # heuristic: ellipsized URLs are probably incomplete, so omit them. if url and not url.endswith('...') and not url.endswith('…'))) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects for url in candidates: resolved = util.follow_redirects(url, **kwargs) if (resolved.url != url and resolved.headers.get('content-type', '').startswith('text/html')): redirects[resolved.url] = url candidates.update(redirects.keys()) # use domains to determine which URLs are original post links vs mentions originals = set() mentions = set() for url in util.dedupe_urls(candidates): if url in redirects.values(): # this is a redirected original URL. postpone and handle it when we hit # its final URL so that we know the final domain. continue domain = util.domain_from_link(url) which = (originals if not domains or util.domain_or_parent_in(domain, domains) else mentions) which.add(url) redirected_from = redirects.get(url) if redirected_from and include_redirect_sources: which.add(redirected_from) logging.info('Original post discovery found original posts %s, mentions %s', originals, mentions) return originals, mentions
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) obj = util.get_first(a, 'object', default={}) primary = obj if (not act_type or act_type == 'post') else a # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize actor images for elem in a, obj: actor = elem.get('actor') if actor: actor['image'] = util.get_first(actor, 'image') # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': att['image'] = util.get_first(att, 'image') image_atts.append(att['image']) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties') or {}) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [_encode_ampersands(child) for child in children] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def _create(self, obj, preview, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews creating for the previous two methods. https://www.flickr.com/services/api/upload.api.html https://www.flickr.com/services/api/flickr.photos.comments.addComment.html https://www.flickr.com/services/api/flickr.favorites.add.html https://www.flickr.com/services/api/flickr.photos.people.add.html Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Return: a CreationResult """ # photo, comment, or like type = source.object_type(obj) logging.debug('publishing object type %s to Flickr', type) link_text = '(Originally published at: %s)' % obj.get('url') image_url = util.get_first(obj, 'image', {}).get('url') video_url = util.get_first(obj, 'stream', {}).get('url') content = self._content_for_create(obj, ignore_formatting=ignore_formatting, strip_first_video_tag=bool(video_url)) if (video_url or image_url) and type in ('note', 'article'): name = obj.get('displayName') people = self._get_person_tags(obj) hashtags = [t.get('displayName') for t in obj.get('tags', []) if t.get('objectType') == 'hashtag' and t.get('displayName')] lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # if name does not represent an explicit title, then we'll just # use it as the title and wipe out the content if name and content and not mf2util.is_name_a_title(name, content): name = content content = None # add original post link if include_link == source.INCLUDE_LINK: content = ((content + '\n\n') if content else '') + link_text if preview: preview_content = '' if name: preview_content += '<h4>%s</h4>' % name if content: preview_content += '<div>%s</div>' % content if hashtags: preview_content += '<div> %s</div>' % ' '.join('#' + t for t in hashtags) if people: preview_content += '<div> with %s</div>' % ', '.join( ('<a href="%s">%s</a>' % ( p.get('url'), p.get('displayName') or 'User %s' % p.get('id')) for p in people)) if lat and lng: preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % (lat, lng, lat, lng) if video_url: preview_content += ('<video controls src="%s"><a href="%s">this video' '</a></video>' % (video_url, video_url)) else: preview_content += '<img src="%s" />' % image_url return source.creation_result(content=preview_content, description='post') params = [] if name: params.append(('title', name)) if content: params.append(('description', content.encode('utf-8'))) if hashtags: params.append(('tags', ','.join(('"%s"' % t if ' ' in t else t) for t in hashtags))) file = util.urlopen(video_url or image_url) try: resp = self.upload(params, file) except requests.exceptions.ConnectionError as e: if str(e.args[0]).startswith('Request exceeds 10 MiB limit'): msg = 'Sorry, photos and videos must be under 10MB.' return source.creation_result(error_plain=msg, error_html=msg) else: raise photo_id = resp.get('id') resp.update({ 'type': 'post', 'url': self.photo_url(self.path_alias() or self.user_id(), photo_id), }) if video_url: resp['granary_message'] = \ "Note that videos take time to process before they're visible." # add person tags for person_id in sorted(p.get('id') for p in people): self.call_api_method('flickr.photos.people.add', { 'photo_id': photo_id, 'user_id': person_id, }) # add location if lat and lng: self.call_api_method('flickr.photos.geo.setLocation', { 'photo_id': photo_id, 'lat': lat, 'lon': lng, }) return source.creation_result(resp) base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') if type == 'tag': if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to tag.', error_html='Could not find a photo to <a href="http://indiewebcamp.com/tag-reply">tag</a>. ' 'Check that your post has a <a href="http://indiewebcamp.com/https://indieweb.org/tag-reply#How_to_post_a_tag-reply">tag-of</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.') tags = sorted(set(util.trim_nulls(t.get('displayName', '').strip() for t in util.get_list(obj, 'object')))) if not tags: return source.creation_result( abort=True, error_plain='No tags found (with p-category) in tag-of post.', error_html='No <a href="https://indieweb.org/tags">tags</a> found (with p-category) in <a href="https://indieweb.org/tag-reply#How_to_post_a_tag-reply">tag-of post</a>.') if preview: return source.creation_result( content=content, description='add the tag%s %s to <a href="%s">this photo</a>.' % ('s' if len(tags) > 1 else '', ', '.join('<em>%s</em>' % tag for tag in tags), base_url)) resp = self.call_api_method('flickr.photos.addTags', { 'photo_id': base_id, # multiply valued fields are space separated. not easy to find in the # Flickr API docs, this is the closest I found: # https://www.flickr.com/services/api/upload.api.html#yui_3_11_0_1_1606756373916_317 'tags': ' '.join(tags), }) if not resp: resp = {} resp.update({ 'type': 'tag', 'url': '%s#tagged-by-%s' % (base_url, self.user_id()), 'tags': tags, }) return source.creation_result(resp) # maybe a comment on a flickr photo? if type == 'comment' or obj.get('inReplyTo'): if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to comment on.', error_html='Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.') if include_link == source.INCLUDE_LINK: content += '\n\n' + link_text if preview: return source.creation_result( content=content, description='comment on <a href="%s">this photo</a>.' % base_url) resp = self.call_api_method('flickr.photos.comments.addComment', { 'photo_id': base_id, 'comment_text': content.encode('utf-8'), }) resp = resp.get('comment', {}) resp.update({ 'type': 'comment', 'url': resp.get('permalink'), }) return source.creation_result(resp) if type == 'like': if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to favorite.', error_html='Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.') if preview: return source.creation_result( description='favorite <a href="%s">this photo</a>.' % base_url) # this method doesn't return any data self.call_api_method('flickr.favorites.add', { 'photo_id': base_id, }) # TODO should we canonicalize the base_url (e.g. removing trailing path # info like "/in/contacts/") return source.creation_result({ 'type': 'like', 'url': '%s#favorited-by-%s' % (base_url, self.user_id()), }) return source.creation_result( abort=False, error_plain='Cannot publish type=%s to Flickr.' % type, error_html='Cannot publish type=%s to Flickr.' % type)
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: MagicKey, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity['publicKey'] = { 'publicKeyPem': key.public_pem(), } return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning( "AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_tos[0]) activity['inReplyTo'] = in_reply_to[0] # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object', {}) if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'object': activity, } return util.trim_nulls(activity)
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = common.redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(appengine_config.HOST_URL): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags") # send webmentions and store Responses errors = [] for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info('Skipping same-domain webmention from %s to %s', source, target) continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates a new issue or comment. When creating a new issue, if the authenticated user is a collaborator on the repo, tags that match existing labels are converted to those labels and included. https://developer.github.com/v4/guides/forming-calls/#about-mutations https://developer.github.com/v4/mutation/addcomment/ https://developer.github.com/v4/mutation/addreaction/ https://developer.github.com/v3/issues/#create-an-issue Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: a CreationResult If preview is True, the contents will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created GitHub object. """ assert preview in (False, True) type = source.object_type(obj) if type and type not in ('issue', 'comment', 'activity', 'note', 'article', 'like', 'tag'): return source.creation_result( abort=False, error_plain='Cannot publish %s to GitHub' % type) base_obj = self.base_object(obj) base_url = base_obj.get('url') if not base_url: return source.creation_result( abort=True, error_plain='You need an in-reply-to GitHub repo, issue, PR, or comment URL.') content = orig_content = html.escape( self._content_for_create(obj, ignore_formatting=ignore_formatting), quote=False) url = obj.get('url') if include_link == source.INCLUDE_LINK and url: content += '\n\n(Originally published at: %s)' % url parsed = urllib.parse.urlparse(base_url) path = parsed.path.strip('/').split('/') owner, repo = path[:2] if len(path) == 4: number = path[3] comment_id = re.match(r'^issuecomment-([0-9]+)$', parsed.fragment) if comment_id: comment_id = comment_id.group(1) elif parsed.fragment: return source.creation_result( abort=True, error_plain='Please remove the fragment #%s from your in-reply-to URL.' % parsed.fragment) if type == 'comment': # comment or reaction if not (len(path) == 4 and path[2] in ('issues', 'pull')): return source.creation_result( abort=True, error_plain='GitHub comment requires in-reply-to issue or PR URL.') is_reaction = orig_content in REACTIONS_GRAPHQL if preview: if comment_id: comment = self.rest(REST_API_COMMENT % (owner, repo, comment_id)).json() target_link = '<a href="%s">a comment on %s/%s#%s, <em>%s</em></a>' % ( base_url, owner, repo, number, util.ellipsize(comment['body'])) else: resp = self.graphql(GRAPHQL_ISSUE_OR_PR, locals()) issue = (resp.get('repository') or {}).get('issueOrPullRequest') target_link = '<a href="%s">%s/%s#%s%s</a>' % ( base_url, owner, repo, number, (', <em>%s</em>' % issue['title']) if issue else '') if is_reaction: preview_content = None desc = u'<span class="verb">react %s</span> to %s.' % ( orig_content, target_link) else: preview_content = self.render_markdown(content, owner, repo) desc = '<span class="verb">comment</span> on %s:' % target_link return source.creation_result(content=preview_content, description=desc) else: # create # we originally used the GraphQL API to create issue comments and # reactions, but it often gets rejected against org repos due to access # controls. oddly, the REST API works fine in those same cases. # https://github.com/snarfed/bridgy/issues/824 if is_reaction: if comment_id: api_url = REST_API_COMMENT_REACTIONS % (owner, repo, comment_id) reacted = self.rest(api_url, data={ 'content': REACTIONS_REST.get(orig_content), }).json() url = base_url else: api_url = REST_API_REACTIONS % (owner, repo, number) reacted = self.rest(api_url, data={ 'content': REACTIONS_REST.get(orig_content), }).json() url = '%s#%s-by-%s' % (base_url, reacted['content'].lower(), reacted['user']['login']) return source.creation_result({ 'id': reacted.get('id'), 'url': url, 'type': 'react', }) else: try: api_url = REST_API_COMMENTS % (owner, repo, number) commented = self.rest(api_url, data={'body': content}).json() return source.creation_result({ 'id': commented.get('id'), 'url': commented.get('html_url'), 'type': 'comment', }) except ValueError as e: return source.creation_result(abort=True, error_plain=str(e)) elif type == 'like': # star if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')): return source.creation_result( abort=True, error_plain='GitHub like requires in-reply-to repo URL.') if preview: return source.creation_result( description='<span class="verb">star</span> <a href="%s">%s/%s</a>.' % (base_url, owner, repo)) else: issue = self.graphql(GRAPHQL_REPO, locals()) resp = self.graphql(GRAPHQL_ADD_STAR, { 'starrable_id': issue['repository']['id'], }) return source.creation_result({ 'url': base_url + '/stargazers', }) elif type == 'tag': # add label if not (len(path) == 4 and path[2] in ('issues', 'pull')): return source.creation_result( abort=True, error_plain='GitHub tag post requires tag-of issue or PR URL.') tags = set(util.trim_nulls(t.get('displayName', '').strip() for t in util.get_list(obj, 'object'))) if not tags: return source.creation_result( abort=True, error_plain='No tags found in tag post!') existing_labels = self.existing_labels(owner, repo) labels = sorted(tags & existing_labels) issue_link = '<a href="%s">%s/%s#%s</a>' % (base_url, owner, repo, number) if not labels: return source.creation_result( abort=True, error_html="No tags in [%s] matched %s's existing labels [%s]." % (', '.join(sorted(tags)), issue_link, ', '.join(sorted(existing_labels)))) if preview: return source.creation_result( description='add label%s <span class="verb">%s</span> to %s.' % ( ('s' if len(labels) > 1 else ''), ', '.join(labels), issue_link)) else: resp = self.rest(REST_API_ISSUE_LABELS % (owner, repo, number), labels).json() return source.creation_result({ 'url': base_url, 'type': 'tag', 'tags': labels, }) else: # new issue if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')): return source.creation_result( abort=True, error_plain='New GitHub issue requires in-reply-to repo URL') title = util.ellipsize(obj.get('displayName') or obj.get('title') or orig_content) tags = set(util.trim_nulls(t.get('displayName', '').strip() for t in util.get_list(obj, 'tags'))) labels = sorted(tags & self.existing_labels(owner, repo)) if preview: preview_content = '<b>%s</b><hr>%s' % ( title, self.render_markdown(content, owner, repo)) preview_labels = '' if labels: preview_labels = ' and attempt to add label%s <span class="verb">%s</span>' % ( 's' if len(labels) > 1 else '', ', '.join(labels)) return source.creation_result(content=preview_content, description="""\ <span class="verb">create a new issue</span> on <a href="%s">%s/%s</a>%s:""" % (base_url, owner, repo, preview_labels)) else: resp = self.rest(REST_API_CREATE_ISSUE % (owner, repo), { 'title': title, 'body': content, 'labels': labels, }).json() resp['url'] = resp.pop('html_url') return source.creation_result(resp) return source.creation_result( abort=False, error_plain="%s doesn't look like a GitHub repo, issue, or PR URL." % base_url)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string, the mf2 class that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get( 'inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # TODO: more tags. most will be p-category? ret = { 'type': (['h-card'] if obj_type == 'person' else ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]), 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': [image.get('url') for image in (util.get_list(obj, 'image') or util.get_list(primary, 'image'))], 'video': [obj.get('stream', primary.get('stream', {})).get('url')], 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'latitude': primary.get('latitude'), 'longitude': primary.get('longitude'), 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], }, 'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in primary.get('attachments', []) if c.get('objectType') in ('note', 'article')], } # hashtags and person tags tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': cls = 'u-category h-card' elif tag.get('objectType') == 'hashtag': cls = 'u-category' else: continue ret['properties']['category'].append(object_to_json(tag, entry_class=cls)) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = util.get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] if trim_nulls: ret = util.trim_nulls(ret) return ret
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None, reader=True): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [] for att in attachments: if att.get('objectType') in ('note', 'article'): html = microformats2.render_content(att, include_location=reader) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) obj['rendered_children'].append(_encode_ampersands(html)) # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )
def render_content(obj, include_location=True, synthesize_content=True): """Renders the content of an ActivityStreams object. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = content content = '' for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content += orig[last_end:start] content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end]) last_end = end content += orig[last_end:] # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ attachments = [ a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') ] for tag in attachments + tags.pop('article', []): name = tag.get('displayName', '') open_a_tag = False if tag.get('objectType') == 'video': video = util.get_first(tag, 'stream') or util.get_first( obj, 'stream') poster = util.get_first(tag, 'image', {}) if video and video.get('url'): content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail') else: content += '\n<p>' url = tag.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True image = util.get_first(tag, 'image') or util.get_first( obj, 'image') if image and image.get('url'): content += '\n' + img(image['url'], 'thumbnail', name) if name: content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = tag.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' # generate share/like contexts if the activity does not have content # of its own for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]: obj_type = source.object_type(obj) if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = util.get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % (target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % (target.get( 'url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = { k: v for k, v in author.iteritems() if k != 'image' } content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get( 'title', 'a post')), hcard_to_html( object_to_json(author, default_object_type='person')), ) content += render_content( target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break # location loc = obj.get('location') if include_location and loc: content += '\n' + hcard_to_html(object_to_json( loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t and 'url' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode('%s%s<a href="%s">%s</a>' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] if not obj.get('content_is_html'): # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article')] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in ( ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [att for att in itertools.chain.from_iterable( o.get('attachments', []) for o in util.get_list(obj, 'object')) if att.get('objectType') not in ('note', 'article')] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite. https://docs.joinmastodon.org/api/rest/statuses/ Based on :meth:`Twitter._create`. Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: CreationResult. If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or obj.get('inReplyTo') is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite' atts = obj.get('attachments', []) images = util.dedupe_urls( util.get_list(obj, 'image') + [a for a in atts if a.get('objectType') == 'image']) videos = util.dedupe_urls( [obj] + [a for a in atts if a.get('objectType') == 'video'], key='stream') has_media = (images or videos) and (type in ('note', 'article') or is_reply) # prefer displayName over content for articles # # TODO: handle activities as well as objects? ie pull out ['object'] here if # necessary? type = obj.get('objectType') prefer_content = type == 'note' or (base_url and is_reply) preview_description = '' content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content) if not content: if type == 'activity' and not is_rsvp: content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post']) if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to reply to.' % post_label, error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.' % post_label) # truncate and ellipsize content if necessary # TODO: don't count domains in remote mentions. # https://docs.joinmastodon.org/usage/basics/#text content = self.truncate(content, obj.get('url'), include_link, type) # linkify user mentions def linkify_mention(match): split = match.group(1).split('@') username = split[0] instance = ('https://' + split[1]) if len(split) > 1 else self.instance url = urllib.parse.urljoin(instance, '/@' + username) return '<a href="%s">@%s</a>' % (url, username) preview_content = MENTION_RE.sub(linkify_mention, content) # linkify (defaults to twitter's behavior) preview_content = util.linkify(preview_content, pretty=True, skip_bare_cc_tlds=True) tags_url = urllib.parse.urljoin(self.instance, '/tags') preview_content = HASHTAG_RE.sub( r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content) # switch on activity type if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['like']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.' % (post_label, self.TYPE_LABELS['like'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['like'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_FAVORITE % base_id) resp['type'] = 'like' elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['repost']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.' % (post_label, self.TYPE_LABELS['repost'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['repost'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_REBLOG % base_id) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply or is_rsvp: # a post data = {'status': content} if is_reply: preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['comment'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) data['in_reply_to_id'] = base_id else: preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[ 'post'] num_media = len(videos) + len(images) if num_media > MAX_MEDIA: videos = videos[:MAX_MEDIA] images = images[:max(MAX_MEDIA - len(videos), 0)] logging.warning('Found %d media! Only using the first %d: %r', num_media, MAX_MEDIA, videos + images) if preview: media_previews = [ '<video controls src="%s"><a href="%s">%s</a></video>' % (util.get_url(vid, key='stream'), util.get_url(vid, key='stream'), vid.get('displayName') or 'this video') for vid in videos ] + [ '<img src="%s" alt="%s" />' % (util.get_url(img), img.get('displayName') or '') for img in images ] if media_previews: preview_content += '<br /><br />' + ' '.join( media_previews) return source.creation_result(content=preview_content, description=preview_description) else: ids = self.upload_media(videos + images) if ids: data['media_ids'] = ids resp = self._post(API_STATUSES, json=data) else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb)) if 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands( microformats2.render_content(primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape( BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': image_atts.append(util.get_first(att, 'image')) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile( r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [ _encode_ampersands(child) for child in children ] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo') or [] if not in_reply_tos: context = obj.get('context') if context and isinstance(context, dict): in_reply_tos = context.get('inReplyTo') or [] is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # prefer duration and size from object's stream, then first video, then first # audio stream = {} for candidate in [obj] + attachments['video'] + attachments['audio']: for stream in get_list(candidate, 'stream'): if stream: break duration = stream.get('duration') if duration is not None: if util.is_int(duration): duration = str(duration) else: logging('Ignoring duration %r; expected int, got %s', duration.__class__) duration = None sizes = [] size = stream.get('size') if size: sizes = [str(size)] # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, str) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'duration': [duration], 'size': sizes, 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags, # otherwise p-. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking text = xml.sax.saxutils.unescape(primary.get('content', '')) html = render_content(primary, include_location=False, synthesize_content=synthesize_content) if '<' in html: ret['properties']['content'] = [{'value': text, 'html': html}] else: ret['properties']['content'] = [text] # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def render_content(obj, include_location=True, synthesize_content=True): """Renders the content of an ActivityStreams object. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = content content = '' for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content += orig[last_end:start] content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end]) last_end = end content += orig[last_end:] # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ attachments = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article')] for tag in attachments + tags.pop('article', []): name = tag.get('displayName', '') open_a_tag = False if tag.get('objectType') == 'video': video = util.get_first(tag, 'stream') or util.get_first(obj, 'stream') poster = util.get_first(tag, 'image', {}) if video and video.get('url'): content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail') else: content += '\n<p>' url = tag.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True image = util.get_first(tag, 'image') or util.get_first(obj, 'image') if image and image.get('url'): content += '\n' + img(image['url'], 'thumbnail', name) if name: content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = tag.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' # generate share/like contexts if the activity does not have content # of its own for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]: obj_type = source.object_type(obj) if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = util.get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.iteritems() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break # location loc = obj.get('location') if include_location and loc: content += '\n' + hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def to_as1(obj, use_type=True): """Converts an ActivityStreams 2 activity or object to ActivityStreams 1. Args: obj: dict, AS2 activity or object use_type: boolean, whether to include objectType and verb Returns: dict, AS1 activity or object """ if not obj: return {} elif isinstance(obj, basestring): return {'url': obj} elif not isinstance(obj, dict): raise ValueError('Expected dict, got %r' % obj) obj = copy.deepcopy(obj) obj.pop('@context', None) type = obj.pop('type', None) if use_type: obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type) obj['verb'] = TYPE_TO_VERB.get(type) if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'): obj['objectType'] = 'comment' elif obj['verb'] and not obj['objectType']: obj['objectType'] = 'activity' def url_or_as1(val): return {'url': val} if isinstance(val, basestring) else to_as1(val) def all_to_as1(field): return [to_as1(elem) for elem in util.pop_list(obj, field)] images = [] # icon first since e.g. Mastodon uses icon for profile picture, # image for featured photo. for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'): as1_img = to_as1(as2_img, use_type=False) if as1_img not in images: images.append(as1_img) inner_objs = all_to_as1('object') if len(inner_objs) == 1: inner_objs = inner_objs[0] obj.update({ 'displayName': obj.pop('name', None), 'actor': to_as1(obj.get('actor')), 'attachments': all_to_as1('attachment'), 'image': images, 'inReplyTo': [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')], 'location': url_or_as1(obj.get('location')), 'object': inner_objs, 'tags': all_to_as1('tag'), }) if type in ('Audio', 'Video'): obj['stream'] = {'url': obj.pop('url', None)} elif type == 'Mention': obj['url'] = obj.pop('href', None) attrib = util.pop_list(obj, 'attributedTo') if attrib: if len(attrib) > 1: logging.warning('ActivityStreams 1 only supports single author; ' 'dropping extra attributedTo values: %s' % attrib[1:]) obj['author'] = to_as1(attrib[0]) return util.trim_nulls(obj)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string, the mf2 class that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj: return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # TODO: more tags. most will be p-category? ret = { 'type': (['h-card'] if obj_type == 'person' else ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]), 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': [ image.get('url') for image in (util.get_list(obj, 'image') or util.get_list(primary, 'image')) ], 'video': [obj.get('stream', primary.get('stream', {})).get('url')], 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'latitude': primary.get('latitude'), 'longitude': primary.get('longitude'), 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], }, 'children': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in primary.get('attachments', []) if c.get('objectType') in ('note', 'article') ], } # hashtags and person tags tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get( 'tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': cls = 'u-category h-card' elif tag.get('objectType') == 'hashtag': cls = 'u-category' else: continue ret['properties']['category'].append( object_to_json(tag, entry_class=cls)) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = util.get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] if trim_nulls: ret = util.trim_nulls(ret) return ret
def send_webmentions(activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'{verb} activities are not supported yet.') # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(request.host_url): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error("Couldn't find original post URL") if not targets: error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] # stores (code, body) tuples for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info( f'Skipping same-domain webmention from {source} to {target}') continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info(f'Sending webmention from {wm_source} to {target}') try: endpoint = webmention.discover(target, headers=HEADERS).endpoint if endpoint: webmention.send(endpoint, wm_source, target, headers=HEADERS) response.status = 'complete' logging.info('Success!') else: response.status = 'ignored' logging.info('Ignoring.') except BaseException as e: errors.append(util.interpret_http_exception(e)) response.put() if errors: msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors) error(msg, status=int(errors[0][0] or 502))
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: :class:`models.MagicKey`, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity.update({ 'publicKey': { 'id': activity.get('preferredUsername'), 'publicKeyPem': key.public_pem().decode(), }, '@context': (util.get_list(activity, '@context') + ['https://w3id.org/security/v1']), }) return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning("AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_to[0]) activity['inReplyTo'] = in_reply_to[0] # Mastodon evidently requires a Mention tag for replies to generate a # notification to the original post's author. not required for likes, # reposts, etc. details: # https://github.com/snarfed/bridgy-fed/issues/34 if target: for to in (util.get_list(target, 'attributedTo') + util.get_list(target, 'actor')): if isinstance(to, dict): to = to.get('url') or to.get('id') if to: activity.setdefault('tag', []).append({ 'type': 'Mention', 'href': to, }) # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object') if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif target_id and obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') # TODO: find a better way to check this, sometimes or always? # removed for now since it fires on posts without u-id or u-url, eg # https://chrisbeckstrom.com/2018/12/27/32551/ # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) activity['id'] = redirect_wrap(activity.get('id')) activity['url'] = redirect_wrap(activity.get('url')) # copy image(s) into attachment(s). may be Mastodon-specific. # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618 obj_or_activity = obj if isinstance(obj, dict) else activity obj_or_activity.setdefault('attachment', []).extend(obj_or_activity.get('image', [])) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls( util.get_url(recip) or recip.get('id') for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'id': f'{activity["id"]}#bridgy-fed-create', 'object': activity, } return util.trim_nulls(activity)
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title'])) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [ _encode_ampersands(microformats2.render_content(att)) for att in attachments if att.get('objectType') in ('note', 'article')] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )