def convert_mf2util(): def dates_to_string(json): if isinstance(json, dict): return {k: dates_to_string(v) for (k, v) in json.items()} if isinstance(json, list): return [dates_to_string(v) for v in json] if isinstance(json, datetime.date) or isinstance(json, datetime.datetime): return json.isoformat() return json url = request.args.get('url') as_feed = request.args.get('as-feed') op = request.args.get('op') if url: try: d = mf2py.parse(url=url) if op == 'post-type-discovery': entry = mf2util.find_first_entry(d, ['h-entry', 'h-event']) return jsonify({'type': mf2util.post_type_discovery(entry)}) if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']): json = mf2util.interpret_feed(d, url) else: json = mf2util.interpret(d, url) return jsonify(dates_to_string(json)) except: current_app.logger.exception('running mf2util service') return jsonify({'error': str(sys.exc_info()[0])}) return """
def micropub2pelican(post, settings={}): post_type = mf2util.post_type_discovery(post) if post_type not in supported_post_types: raise Exception(f'{post_type} not among supported post types') entry = mf2util.interpret_entry({'items': [post]}, '') if not entry: raise Exception('Could not interpret parsed entry') return get_html(settings, post, post_type), \ get_metadata(settings, entry, post, post_type)
def test_post_type_discovery(): for test, implied_type in [ ('interpret/hwc-event', 'event'), ('interpret/reply_h-cite', 'reply'), ('interpret/reply_u-in-reply-to', 'reply'), ('interpret/reply_rsvp', 'rsvp'), ('interpret/note_with_comment_and_like', 'note'), ('interpret/article_naive_datetime', 'article'), ('posttype/tantek_photo', 'photo'), ('posttype/only_html_content', 'note'), # TODO add more tests ]: parsed = json.load(open('tests/' + test + '.json')) entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event']) assert implied_type == mf2util.post_type_discovery(entry)
def test_post_type_discovery(): for test, implied_type in [ ('interpret/hwc-event', 'event'), ('interpret/reply_h-cite', 'reply'), ('interpret/reply_u-in-reply-to', 'reply'), ('interpret/reply_rsvp', 'rsvp'), ('interpret/note_with_comment_and_like', 'note'), ('interpret/article_naive_datetime', 'article'), ('interpret/follow', 'follow'), ('posttype/tantek_photo', 'photo'), ('posttype/only_html_content', 'note'), # TODO add more tests ]: parsed = json.load(open('tests/' + test + '.json')) entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event']) assert implied_type == mf2util.post_type_discovery(entry)
def interpret(self): if self._type is not None or self._props is not None: return import mf2util self._type = mf2util.post_type_discovery(self._mf_entry) self._props = mf2util.interpret_entry( self._owner_feed._mf_dict, self._owner_feed.url, hentry=self._mf_entry) # Adds a `is_micropost` property. self._detect_micropost() # mf2util only detects the first photo for a "photo"-type post, # but there might be several so we need to fix that. # # mf2util also apparently doesn't always bring "category" info. self._fix_interpreted_props('photo', 'category')
def json_to_object(mf2): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} props = mf2.get('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop.get('author')) # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'photo': ('note', None), 'article': ('article', None), } mf2_types = mf2.get('type') or [] mf2_type = ('location' if 'h-geo' in mf2_types or 'p-location' in mf2_types else mf2util.post_type_discovery(mf2)) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) def absolute_urls(prop): return [ { 'url': url } for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc ] urls = props.get('url') and get_string_urls(props.get('url')) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{ 'value': u } for u in urls] if urls and len(urls) > 1 else None, 'image': absolute_urls('photo'), 'stream': absolute_urls('video'), 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, 'tags': [{ 'objectType': 'hashtag', 'displayName': cat } if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], } lat, lng = prop.get('latitude'), prop.get('longitude') if lat and lng: try: obj['latitude'], obj['longitude'] = float(lat), float(lng) # TODO fill in 'position', maybe using Source.postprocess_object? except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else { 'url': target } # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def json_to_object(mf2, actor=None): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop['author']) if prop.get('author') else actor # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'article': ('article', None), } mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) def absolute_urls(prop): return [{'url': url} for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc] urls = props.get('url') and get_string_urls(props.get('url')) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, 'image': absolute_urls('photo'), 'stream': absolute_urls('video'), 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location']['latitude'] = float(lat) obj['location']['longitude'] = float(lng) # TODO fill in 'position', maybe using Source.postprocess_object? except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship def fetch(url): return mf2py.parse(util.requests_get(url).text, url=url) author = mf2util.find_author( {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{ 'url': author.get('photo') }], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def absolute_urls(prop): return [ url for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urllib.parse.urlparse(url).netloc ] urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', [])) ] # audio and video for type in 'audio', 'video': attachments.extend({ 'objectType': type, 'stream': { 'url': url } } for url in get_string_urls(props.get(type, []))) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{ 'value': u } for u in urls] if urls and len(urls) > 1 else None, 'image': [{ 'url': url } for url in dedupe_urls( absolute_urls('photo') + absolute_urls('featured'))], 'stream': [{ 'url': url } for url in absolute_urls('video')], 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, 'tags': [{ 'objectType': 'hashtag', 'displayName': cat } if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else { 'url': target } # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts a single microformats2 JSON item to an ActivityStreams object. Supports h-entry, h-event, h-card, and other single item times. Does *not* yet support h-feed. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship def fetch(url): return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True) author = mf2util.find_author( {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{'url': author.get('photo')}], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' elif 'tag-of' in props: # TODO: remove once this is in mf2util # https://github.com/kylewm/mf2util/issues/18 mf2_type = 'tag' elif 'follow-of' in props: # ditto mf2_type = 'follow' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def is_absolute(url): """Filter out relative and invalid URLs (mf2py gives absolute urls).""" return urllib.parse.urlparse(url).netloc urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))] # audio and video for type in 'audio', 'video': attachments.extend({'objectType': type, 'stream': {'url': url}} for url in get_string_urls(props.get(type, []))) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, # image is special cased below, to handle alt 'stream': [{'url': url} for url in get_string_urls(props.get('video', []))], 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # images, including alt text photo_urls = set() obj['image'] = [] for photo in props.get('photo', []) + props.get('featured', []): url = photo alt = None if isinstance(photo, dict): photo = photo.get('properties') or photo url = get_first(photo, 'value') or get_first(photo, 'url') alt = get_first(photo, 'alt') if url and url not in photo_urls and is_absolute(url): photo_urls.add(url) obj['image'].append({'url': url, 'displayName': alt}) # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) if as_verb == 'tag': obj['target'] = {'url': prop['tag-of']} if obj.get('object'): raise NotImplementedError( 'Combined in-reply-to and tag-of is not yet supported.') obj['object'] = obj.pop('tags') else: obj.update({ 'inReplyTo': [{'url': url} for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)
def json_to_object(mf2, actor=None): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop['author']) if prop.get('author') else actor # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'article': ('article', None), } mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) def absolute_urls(prop): return [ { 'url': url } for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc ] urls = props.get('url') and get_string_urls(props.get('url')) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{ 'value': u } for u in urls] if urls and len(urls) > 1 else None, 'image': absolute_urls('photo'), 'stream': absolute_urls('video'), 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, 'tags': [{ 'objectType': 'hashtag', 'displayName': cat } if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location']['latitude'] = float(lat) obj['location']['longitude'] = float(lng) # TODO fill in 'position', maybe using Source.postprocess_object? except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else { 'url': target } # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def fetch_post_type(parsed): hentry = mf2util.find_first_entry(parsed, ['h-entry']) if hentry: return mf2util.post_type_discovery(hentry) else: return 'note'
def json_to_object(mf2): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} props = mf2.get('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop.get('author')) # maps mf2 class to a mf2 type. ordered by priority. these explicit # h-as-* types can override implicit post type discovery. h_class_overrides = [ ('h-as-rsvp', 'rsvp'), ('h-as-share', 'repost'), ('h-as-like', 'like'), ('h-as-comment', 'reply'), ('h-as-reply', 'reply'), ('h-as-article', 'article'), ('h-as-note', 'note'), ('h-as-location', 'location'), ('h-card', 'person'), ] # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'photo': ('note', None), 'article': ('article', None), } for h_class, mf2_type in h_class_overrides: if h_class in mf2.get('type', []): break # found else: mf2_type = mf2util.post_type_discovery(mf2) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) photos = [url for url in get_string_urls(props.get('photo', [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc] urls = props.get('url') and get_string_urls(props.get('url')) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, 'image': {'url': photos[0] if photos else None}, 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [json_to_object(cat) for cat in props.get('category', [])], } if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts a single microformats2 JSON item to an ActivityStreams object. Supports h-entry, h-event, h-card, and other single item times. Does *not* yet support h-feed. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship author = mf2util.find_author({'items': [mf2]}, hentry=mf2, fetch_mf2_func=util.fetch_mf2 if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{'url': author.get('photo')}], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' elif 'tag-of' in props: # TODO: remove once this is in mf2util # https://github.com/kylewm/mf2util/issues/18 mf2_type = 'tag' elif 'follow-of' in props: # ditto mf2_type = 'follow' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def is_absolute(url): """Filter out relative and invalid URLs (mf2py gives absolute urls).""" return urllib.parse.urlparse(url).netloc urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))] # audio and video # # the duration mf2 property is still emerging. examples in the wild use both # integer seconds and ISO 8601 durations. # https://indieweb.org/duration # https://en.wikipedia.org/wiki/ISO_8601#Durations duration = prop.get('duration') or prop.get('length') if duration: if util.is_int(duration): duration = int(duration) else: parsed = util.parse_iso8601_duration(duration) if parsed: duration = int(parsed.total_seconds()) else: logging.debug('Unknown format for length or duration %r', duration) duration = None stream = None bytes = size_to_bytes(prop.get('size')) for type in 'audio', 'video': atts = [{ 'objectType': type, 'stream': { 'url': url, # integer seconds: http://activitystrea.ms/specs/json/1.0/#media-link 'duration': duration, # file size in bytes. nonstandard, not in AS1 or AS2 'size': bytes, }, } for url in get_string_urls(props.get(type, []))] attachments.extend(atts) if atts: stream = atts[0]['stream'] obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, # image is special cased below, to handle alt 'stream': [stream], 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, str) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # images, including alt text photo_urls = set() obj['image'] = [] for photo in props.get('photo', []) + props.get('featured', []): url = photo alt = None if isinstance(photo, dict): photo = photo.get('properties') or photo url = get_first(photo, 'value') or get_first(photo, 'url') alt = get_first(photo, 'alt') if url and url not in photo_urls and is_absolute(url): photo_urls.add(url) obj['image'].append({'url': url, 'displayName': alt}) # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.debug( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) if as_verb == 'tag': obj['target'] = {'url': prop['tag-of']} if obj.get('object'): raise NotImplementedError( 'Combined in-reply-to and tag-of is not yet supported.') obj['object'] = obj.pop('tags') else: obj.update({ 'inReplyTo': [{'url': url} for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)