def upload_media(self, media): """Uploads one or more images or videos from web URLs. https://docs.joinmastodon.org/api/rest/media/ Args: media: sequence of AS image or stream objects, eg: [{'url': 'http://picture', 'displayName': 'a thing'}, ...] Returns: list of string media ids for uploaded files """ uploaded = set() # URLs uploaded so far; for de-duping ids = [] for obj in media: url = util.get_url(obj, key='stream') or util.get_url(obj) if not url or url in uploaded: continue data = {} alt = obj.get('displayName') if alt: data['description'] = util.ellipsize(alt, chars=MAX_ALT_LENGTH) # TODO: mime type check? with util.requests_get(url, stream=True) as fetch: fetch.raise_for_status() upload = self._post(API_MEDIA, files={'file': fetch.raw}) logging.info('Got: %s', upload) media_id = upload['id'] ids.append(media_id) uploaded.add(url) return ids
def send_webmentions(handler, activity, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity: dict, AS1 activity response_props: passed through to the newly created Responses """ verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source: source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) if verb in ('like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find target URLs (inReplyTo or object)") # send webmentions and store Responses errors = [] for target in targets: if not target: continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = response.proxy_url() if verb in ('like', 'share') else source logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def _single_target(self): """ Returns: string URL, the source's inReplyTo or object (if appropriate) """ target = util.get_first(self.source_obj, 'inReplyTo') if target: return util.get_url(target) if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT: return util.get_url(util.get_first(self.source_obj, 'object'))
def base_object(self, obj): """Returns the 'base' Mastodon object that an object operates on. If the object is a reply, boost, or favorite of a Mastodon post - on any instance - this returns that post object. The id in the returned object is the id of that remote post *on the local instance*. (As a Mastodon style id, ie an integer in a string, *not* a tag URI.) Uses Mastodon's search API on the local instance to determine whether a URL is a Mastodon post, and if it is, to find or generate an id for it on the local instance. Discovered via https://mastodon.social/@jkreeftmeijer/101245063526942536 Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ for field in ('inReplyTo', 'object', 'target'): for base in util.get_list(obj, field): # first, check if it's on local instance url = util.get_url(base) if url.startswith(self.instance): return self._postprocess_base_object(base) # nope; try mastodon's search API try: results = self._get(API_SEARCH, params={ 'q': url, 'resolve': True }) except requests.RequestException as e: logging.info("%s URL %s doesn't look like Mastodon:", field, url) continue for status in results.get('statuses', []): if url in (status.get('url'), status.get('uri')): # found it! base = self.status_to_object(status) base['id'] = status['id'] return self._postprocess_base_object(base) return {}
def slap(acct): """Accepts POSTs to /[ACCT]/salmon and converts to outbound webmentions.""" # TODO: unify with activitypub body = request.get_data(as_text=True) logging.info(f'Got: {body}') try: parsed = utils.parse_magic_envelope(body) except ParseError as e: error('Could not parse POST body as XML', exc_info=True) data = parsed['data'] logging.info(f'Decoded: {data}') # check that we support this activity type try: activity = atom.atom_to_activity(data) except ParseError as e: error('Could not parse envelope data as XML', exc_info=True) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'Sorry, {verb} activities are not supported yet.', status=501) # verify author and signature author = util.get_url(activity.get('actor')) if ':' not in author: author = f'acct:{author}' elif not author.startswith('acct:'): error(f'Author URI {author} has unsupported scheme; expected acct:') logging.info(f'Fetching Salmon key for {author}') if not magicsigs.verify(data, parsed['sig'], author_uri=author): error('Could not verify magic signature.') logging.info('Verified magic signature.') # Verify that the timestamp is recent. Required by spec. # I get that this helps prevent spam, but in practice it's a bit silly, # and other major implementations don't (e.g. Mastodon), so forget it. # # updated = utils.parse_updated_from_atom(data) # if not utils.verify_timestamp(updated): # error('Timestamp is more than 1h old.') # send webmentions to each target activity = atom.atom_to_activity(data) common.send_webmentions(activity, protocol='ostatus', source_atom=data) return ''
def post(self, username, domain): logging.info('Got: %s', self.request.body) try: parsed = utils.parse_magic_envelope(self.request.body) except ParseError as e: self.error('Could not parse POST body as XML', exc_info=True) data = parsed['data'] logging.info('Decoded: %s', data) # check that we support this activity type try: activity = atom.atom_to_activity(data) except ParseError as e: self.error('Could not parse envelope data as XML', exc_info=True) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: self.error('Sorry, %s activities are not supported yet.' % verb, status=501) # verify author and signature author = util.get_url(activity.get('actor')) if ':' not in author: author = 'acct:%s' % author elif not author.startswith('acct:'): self.error('Author URI %s has unsupported scheme; expected acct:' % author) logging.info('Fetching Salmon key for %s' % author) if not magicsigs.verify(author, data, parsed['sig']): self.error('Could not verify magic signature.') logging.info('Verified magic signature.') # Verify that the timestamp is recent. Required by spec. # I get that this helps prevent spam, but in practice it's a bit silly, # and other major implementations don't (e.g. Mastodon), so forget it. # # updated = utils.parse_updated_from_atom(data) # if not utils.verify_timestamp(updated): # self.error('Timestamp is more than 1h old.') # send webmentions to each target activity = atom.atom_to_activity(data) self.send_webmentions(activity, protocol='ostatus', source_atom=data)
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite. https://docs.joinmastodon.org/api/rest/statuses/ Based on :meth:`Twitter._create`. Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: CreationResult. If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or obj.get('inReplyTo') is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite' atts = obj.get('attachments', []) images = util.dedupe_urls( util.get_list(obj, 'image') + [a for a in atts if a.get('objectType') == 'image']) videos = util.dedupe_urls( [obj] + [a for a in atts if a.get('objectType') == 'video'], key='stream') has_media = (images or videos) and (type in ('note', 'article') or is_reply) # prefer displayName over content for articles # # TODO: handle activities as well as objects? ie pull out ['object'] here if # necessary? type = obj.get('objectType') prefer_content = type == 'note' or (base_url and is_reply) preview_description = '' content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content) if not content: if type == 'activity' and not is_rsvp: content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post']) if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to reply to.' % post_label, error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.' % post_label) # truncate and ellipsize content if necessary # TODO: don't count domains in remote mentions. # https://docs.joinmastodon.org/usage/basics/#text content = self.truncate(content, obj.get('url'), include_link, type) # linkify user mentions def linkify_mention(match): split = match.group(1).split('@') username = split[0] instance = ('https://' + split[1]) if len(split) > 1 else self.instance url = urllib.parse.urljoin(instance, '/@' + username) return '<a href="%s">@%s</a>' % (url, username) preview_content = MENTION_RE.sub(linkify_mention, content) # linkify (defaults to twitter's behavior) preview_content = util.linkify(preview_content, pretty=True, skip_bare_cc_tlds=True) tags_url = urllib.parse.urljoin(self.instance, '/tags') preview_content = HASHTAG_RE.sub( r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content) # switch on activity type if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['like']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.' % (post_label, self.TYPE_LABELS['like'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['like'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_FAVORITE % base_id) resp['type'] = 'like' elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['repost']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.' % (post_label, self.TYPE_LABELS['repost'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['repost'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_REBLOG % base_id) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply or is_rsvp: # a post data = {'status': content} if is_reply: preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['comment'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) data['in_reply_to_id'] = base_id else: preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[ 'post'] num_media = len(videos) + len(images) if num_media > MAX_MEDIA: videos = videos[:MAX_MEDIA] images = images[:max(MAX_MEDIA - len(videos), 0)] logging.warning('Found %d media! Only using the first %d: %r', num_media, MAX_MEDIA, videos + images) if preview: media_previews = [ '<video controls src="%s"><a href="%s">%s</a></video>' % (util.get_url(vid, key='stream'), util.get_url(vid, key='stream'), vid.get('displayName') or 'this video') for vid in videos ] + [ '<img src="%s" alt="%s" />' % (util.get_url(img), img.get('displayName') or '') for img in images ] if media_previews: preview_content += '<br /><br />' + ' '.join( media_previews) return source.creation_result(content=preview_content, description=preview_description) else: ids = self.upload_media(videos + images) if ids: data['media_ids'] = ids resp = self._post(API_STATUSES, json=data) else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb)) if 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def try_activitypub(self): source = util.get_required_param(self, 'source') # fetch source page, convert to ActivityStreams source_resp = common.requests_get(source) source_url = source_resp.url or source source_mf2 = mf2py.parse(source_resp.text, url=source_url) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2)) entry = mf2util.find_first_entry(source_mf2, ['h-entry']) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [source_url] source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) # fetch target page as AS object. target is first in-reply-to, like-of, # or repost-of, *not* target query param.) target = util.get_url(util.get_first(source_obj, 'inReplyTo') or util.get_first(source_obj, 'object')) if not target: common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of ' 'found in %s' % source_url) try: target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: if (e.response.status_code // 100 == 2 and common.content_type(e.response).startswith('text/html')): self.resp = Response.get_or_create( source=source_url, target=e.response.url or target, direction='out', source_mf2=json.dumps(source_mf2)) return self.send_salmon(source_obj, target_resp=e.response) raise target_url = target_resp.url or target self.resp = Response.get_or_create( source=source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json.dumps(source_mf2)) # find actor's inbox target_obj = target_resp.json() inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = target_obj.get('actor') or target_obj.get('attributedTo') if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') if not inbox_url and not actor: common.error(self, 'Target object has no actor or attributedTo URL') if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # common.error(self, 'Target actor has no inbox') return self.send_salmon(source_obj, target_resp=target_resp) # convert to AS2 source_domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(source_domain) source_activity = common.postprocess_as2( as2.from_as1(source_obj), target=target_obj, key=key) if self.resp.status == 'complete': source_activity['type'] = 'Update' # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization-lds # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (source_domain, source_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256') # deliver source object to target actor's inbox. headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), } inbox_url = urlparse.urljoin(target_url, inbox_url) resp = common.requests_post(inbox_url, json=source_activity, auth=auth, headers=headers) self.response.status_int = resp.status_code if resp.status_code == 202: self.response.write('202 response! If this is Mastodon 1.x, their ' 'signature verification probably failed. :(\n') self.response.write(resp.text)
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: :class:`models.MagicKey`, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity.update({ 'publicKey': { 'id': activity.get('preferredUsername'), 'publicKeyPem': key.public_pem().decode(), }, '@context': (util.get_list(activity, '@context') + ['https://w3id.org/security/v1']), }) return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning("AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_to[0]) activity['inReplyTo'] = in_reply_to[0] # Mastodon evidently requires a Mention tag for replies to generate a # notification to the original post's author. not required for likes, # reposts, etc. details: # https://github.com/snarfed/bridgy-fed/issues/34 if target: for to in (util.get_list(target, 'attributedTo') + util.get_list(target, 'actor')): if isinstance(to, dict): to = to.get('url') or to.get('id') if to: activity.setdefault('tag', []).append({ 'type': 'Mention', 'href': to, }) # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object') if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif target_id and obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') # TODO: find a better way to check this, sometimes or always? # removed for now since it fires on posts without u-id or u-url, eg # https://chrisbeckstrom.com/2018/12/27/32551/ # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) activity['id'] = redirect_wrap(activity.get('id')) activity['url'] = redirect_wrap(activity.get('url')) # copy image(s) into attachment(s). may be Mastodon-specific. # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618 obj_or_activity = obj if isinstance(obj, dict) else activity obj_or_activity.setdefault('attachment', []).extend(obj_or_activity.get('image', [])) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls( util.get_url(recip) or recip.get('id') for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'id': f'{activity["id"]}#bridgy-fed-create', 'object': activity, } return util.trim_nulls(activity)
def send_webmentions(activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'{verb} activities are not supported yet.') # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(request.host_url): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error("Couldn't find original post URL") if not targets: error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] # stores (code, body) tuples for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info( f'Skipping same-domain webmention from {source} to {target}') continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info(f'Sending webmention from {wm_source} to {target}') try: endpoint = webmention.discover(target, headers=HEADERS).endpoint if endpoint: webmention.send(endpoint, wm_source, target, headers=HEADERS) response.status = 'complete' logging.info('Success!') else: response.status = 'ignored' logging.info('Ignoring.') except BaseException as e: errors.append(util.interpret_http_exception(e)) response.put() if errors: msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors) error(msg, status=int(errors[0][0] or 502))
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, str)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = (util.get_url(hfeed.get('properties', {}), 'photo') or util.get_url(actor, 'image')) if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None feed_has_enclosure = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') id = obj.get('id') or url item.id(id) item.link(href=url) item.guid(url, permalink=True) # title (required) title = (obj.get('title') or obj.get('displayName') or util.ellipsize(obj.get('content', '-'))) # strip HTML tags title = util.parse_html(title).get_text('').strip() item.title(title) content = microformats2.render_content(obj, include_location=True, render_attachments=True, render_image=True) if not content: content = obj.get('summary') if content: item.content(content, type='CDATA') categories = [ { 'term': t['displayName'] } for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share') and t.get('objectType') not in ('article', 'person', 'mention') ] item.category(categories) author = obj.get('author', {}) author = { 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), 'email': author.get('email') or '-', } item.author(author) published = obj.get('published') or obj.get('updated') if published and isinstance(published, str): try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass item_has_enclosure = False for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): if item_has_enclosure: logging.info( 'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s', id, url) continue item_has_enclosure = feed_has_enclosure = True item.enclosure(url=url, type=mime, length=str(stream.get('size', ''))) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if feed_has_enclosure: fg.load_extension('podcast') fg.podcast.itunes_author( actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) name = author.get('name') if name: fg.podcast.itunes_author(name) if image: fg.podcast.itunes_image(image) fg.podcast.itunes_category(categories) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8')
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: MagicKey, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity['publicKey'] = { 'publicKeyPem': key.public_pem(), } return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning( "AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_tos[0]) activity['inReplyTo'] = in_reply_to[0] # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object', {}) if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'object': activity, } return util.trim_nulls(activity)
def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False, render_image=False, white_space_pre=True): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: boolean, whether to render location, if provided synthesize_content: boolean, whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' render_attachments: boolean, whether to render attachments, eg links, images, audio, and video render_image: boolean, whether to render the object's image(s) white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False, newlines will be converted to <br> tags instead. Background: https://indiewebcamp.com/note#Indieweb_whitespace_thinking Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t and 'url' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode('%s%s<a href="%s">%s</a>' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] # is whitespace in this content meaningful? standard heuristic: if there are # no HTML tags in it, and it has a newline, then assume yes. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking # https://github.com/snarfed/granary/issues/80 if content and not obj.get('content_is_html') and '\n' in content: if white_space_pre: content = '<div style="white-space: pre">%s</div>' % content else: content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then # uncomment this. # if content: # content = util.linkify(content) # the image field. may be multiply valued. rendered_urls = set() if render_image: urls = get_urls(obj, 'image') content += _render_attachments([{ 'objectType': 'image', 'image': {'url': url}, } for url in urls], obj) rendered_urls = set(urls) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') and get_url(a, 'image') not in rendered_urls] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in ( ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content, white_space_pre=white_space_pre) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [att for att in itertools.chain.from_iterable( o.get('attachments', []) for o in util.get_list(obj, 'object')) if att.get('objectType') not in ('note', 'article')] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False) content += tags_to_html(sum(tags.values(), []), 'tag') return content
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = common.redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(appengine_config.HOST_URL): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags") # send webmentions and store Responses errors = [] for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info('Skipping same-domain webmention from %s to %s', source, target) continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image') if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None enclosures = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') item.id(obj.get('id') or url) item.link(href=url) item.guid(url, permalink=True) item.title(obj.get('title') or obj.get('displayName') or '-') # required content = microformats2.render_content( obj, include_location=True, render_attachments=False) or obj.get('summary') if content: item.content(content, type='CDATA') item.category( [{'term': t['displayName']} for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')]) author = obj.get('author', {}) item.author({ 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), }) published = obj.get('published') or obj.get('updated') if published: try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): enclosures = True item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if enclosures: fg.load_extension('podcast') fg.podcast.itunes_author(actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')