def test_reply_h_cite(): # reply with reply-context from aaronnparecki.com parsed = load_test('reply_h-cite') result = mf2util.interpret_comment( parsed, 'http://aaronparecki.com/replies/2014/05/10/1/indieweb', ['http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'http://datahiveconsulting.com/2014/04/10']) assert result['type'] == 'entry' assert not result.get('name') assert "We're working on it ;-)" in result.get('content') assert result['published'].replace(tzinfo=None)\ == datetime(2014, 5, 10, 14, 48, 33) assert result['published'].utcoffset() == timedelta(hours=-7) assert result['comment_type'] == ['reply'] assert result['in-reply-to'] == [{ 'type': 'cite', 'author': { 'name': 'Lynne Baer', 'photo': 'http://aaronparecki.com/images/nouns/user.svg', 'url': 'http://datahiveconsulting.com/author/lynne/', }, 'content': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...", 'content-plain': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...", 'url': 'http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'syndication': [], }] assert result['syndication'] == ['https://twitter.com/aaronpk/status/465247041078034432']
def test_reply_h_cite(): # reply with reply-context from aaronnparecki.com parsed = load_test('reply_h-cite') result = mf2util.interpret_comment( parsed, 'http://aaronparecki.com/replies/2014/05/10/1/indieweb', ['http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'http://datahiveconsulting.com/2014/04/10']) assert result['type'] == 'entry' assert not result.get('name') assert "We're working on it ;-)" in result.get('content') assert result['published'].replace(tzinfo=None)\ == datetime(2014, 5, 10, 14, 48, 33) assert result['published'].utcoffset() == timedelta(hours=-7) assert result['comment_type'] == ['reply'] assert result['in-reply-to'] == [{ 'type': 'cite', 'author': { 'name': 'Lynne Baer', 'photo': 'http://aaronparecki.com/images/nouns/user.svg', 'url': 'http://datahiveconsulting.com/author/lynne/', }, 'content': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...", 'url': 'http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'syndication': [], }] assert result['syndication'] == ['https://twitter.com/aaronpk/status/465247041078034432']
def test_reply_invite(): parsed = load_test('reply_invite') result = mf2util.interpret_comment( parsed, 'https://www.facebook.com/1565113317092307#10155109753190015', ['https://kylewm.com/2015/03/homebrew-website-club-2015-march-25']) assert result['name'] == 'invited' assert result['comment_type'] == ['invite', 'reply'] assert result['invitees'] == [{ 'name': 'Silona Bonewald', 'url': 'https://www.facebook.com/10155109753190015', 'photo': 'https://graph.facebook.com/v2.2/10155109753190015/picture?type=large', }]
def parse_mention(doc, mf, source_url, target_url): """Parse the microformat received to generate a mention for target_url """ mention_dict = mf2util.interpret_comment(mf, source_url, target_url) # if no h-entry do last resort from html # insert safe default parsings mention_dict['comment_type'].append('mention') return mention_dict
def test_reply_rsvp(): parsed = load_test('reply_rsvp') result = mf2util.interpret_comment( parsed, 'https://snarfed.org/2014-05-05_homebrew-website-club-3', ['http://werd.io/2014/homebrew-website-club-4']) assert result['type'] == 'entry' assert result['name'] == 'Homebrew Website Club' assert '<a class="u-in-reply-to"' in result.get('content') assert result['published'].replace(tzinfo=None)\ == datetime(2014, 5, 5, 10, 10, 53) assert result['published'].utcoffset() == timedelta(hours=-7) assert result['comment_type'] == ['rsvp', 'reply'] assert result['rsvp'] == 'yes'
def test_u_in_reply_to(): # reply with simple u-in-reply-to link from snarfed.org parsed = load_test('reply_u-in-reply-to') result = mf2util.interpret_comment( parsed, 'https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile', ['https://willnorris.com/2014/03/display-likes-in-a-facepile']) assert result['type'] == 'entry' assert result['name'] == 'Re: Display likes in a facepile' assert 'oh man, so cool!' in result.get('content') assert result['published'].replace(tzinfo=None)\ == datetime(2014, 3, 9, 22, 48, 22) assert result['published'].utcoffset() == timedelta(hours=-7) assert result['in-reply-to'] == [{'url': 'https://willnorris.com/2014/03/display-likes-in-a-facepile'}] assert result['comment_type'] == ['reply']
def create_mention(post, url, source_response): target_urls = [] if post: base_target_urls = [post.permalink] for base_url in base_target_urls: target_urls.append(base_url) target_urls.append(base_url.replace('https://', 'http://') if base_url.startswith('https://') else base_url.replace('http://', 'https://')) blob = mf2py.Parser(doc=source_response.text, url=url).to_dict() if not blob: app.logger.debug('create_mention: no mf2 in source_response') return entry = mf2util.interpret_comment(blob, url, target_urls) if not entry: app.logger.debug('create_mention: mf2util found no comment entry') return comment_type = entry.get('comment_type') content = util.clean_foreign_html(entry.get('content', '')) content_plain = util.format_as_text(content) published = entry.get('published') if not published: published = datetime.datetime.utcnow() # update an existing mention mention = next((m for m in post.mentions if m.url == url), None) # or create a new one if not mention: mention = Mention() mention.url = url mention.permalink = entry.get('url') or url mention.reftype = comment_type[0] if comment_type else 'reference' mention.author_name = entry.get('author', {}).get('name', '') mention.author_url = entry.get('author', {}).get('url', '') mention.author_image = entry.get('author', {}).get('photo') mention.content = content mention.content_plain = content_plain mention.published = published mention.title = entry.get('name') mention.syndication = entry.get('syndication', []) mention.rsvp = entry.get('rsvp') return mention
def attach_webmentions(article, all_webmentions): wm_for_article = all_webmentions.get(f'/{article.url}', []) for wm in wm_for_article: comment = mf2util.interpret_comment(wm['parsedSource'], wm['sourceUrl'], [wm['targetUrl']]) if comment['comment_type']: comment_type = comment['comment_type'][0] if comment_type == 'like': article.webmentions.likes.append(comment) elif comment_type == 'repost': article.webmentions.reposts.append(comment) elif comment_type == 'reply': article.webmentions.replies.append(comment) else: print(f'Unrecognized comment type: {comment_type}') article.webmentions.unclassified.append(comment) else: print('No comment type parsed') article.webmentions.unclassified.append(comment)
def create_mentions(post, url, source_response, is_person_mention): # utility function for mf2util cached_mf2 = {} def fetch_mf2(url): if url in cached_mf2: return cached_mf2[url] p = mf2py.parse(url=url) cached_mf2[url] = p return p target_urls = [] if post: base_target_urls = [post.permalink] for base_url in base_target_urls: target_urls.append(base_url) target_urls.append(base_url.replace('https://', 'http://') if base_url.startswith('https://') else base_url.replace('http://', 'https://')) blob = mf2py.parse(doc=source_response.text, url=url) cached_mf2[url] = blob if not blob: current_app.logger.debug('create_mention: no mf2 in source_response') return entry = mf2util.interpret_comment( blob, url, target_urls, fetch_mf2_func=fetch_mf2) current_app.logger.debug('interpreted comment: %r', entry) if not entry: current_app.logger.debug( 'create_mention: mf2util found no comment entry') return comment_type = entry.get('comment_type', []) to_process = [(entry, url)] # process 2nd level "downstream" comments if 'reply' in comment_type: downstream_cmts = entry.get('comment', []) current_app.logger.debug('adding in downstream comments:%d', len(downstream_cmts)) for dc in downstream_cmts: if dc.get('url'): to_process.append((dc, dc.get('url'))) results = [] for entry, url in to_process: current_app.logger.debug('processing %s %r', url, entry) content = util.clean_foreign_html(entry.get('content', '')) content_plain = util.format_as_text(content) published = entry.get('published') if not published: published = datetime.datetime.utcnow() # update an existing mention mention = next((m for m in post.mentions if m.url == url), None)\ if post else None # or create a new one if not mention: mention = Mention() mention.url = url mention.person_mention = is_person_mention mention.permalink = entry.get('url') or url mention.reftype = comment_type[0] if comment_type else 'reference' mention.author_name = entry.get('author', {}).get('name', '') mention.author_url = entry.get('author', {}).get('url', '') mention.author_image = entry.get('author', {}).get('photo') mention.content = content mention.content_plain = content_plain mention.published = published mention.title = entry.get('name') mention.syndication = entry.get('syndication', []) mention.rsvp = entry.get('rsvp') results.append(mention) return results
def create_mentions(post, url, source_response, is_person_mention): # utility function for mf2util cached_mf2 = {} def fetch_mf2(url): if url in cached_mf2: return cached_mf2[url] p = mf2py.parse(url=url) cached_mf2[url] = p return p target_urls = [] if post: base_target_urls = [post.permalink] for base_url in base_target_urls: target_urls.append(base_url) target_urls.append( base_url.replace('https://', 'http://') if base_url.startswith( 'https://') else base_url.replace('http://', 'https://')) blob = mf2py.parse(doc=source_response.text, url=url) cached_mf2[url] = blob if not blob: current_app.logger.debug('create_mention: no mf2 in source_response') return entry = mf2util.interpret_comment(blob, url, target_urls, fetch_mf2_func=fetch_mf2) current_app.logger.debug('interpreted comment: %r', entry) if not entry: current_app.logger.debug( 'create_mention: mf2util found no comment entry') return comment_type = entry.get('comment_type', []) to_process = [(entry, url)] # process 2nd level "downstream" comments if 'reply' in comment_type: downstream_cmts = entry.get('comment', []) current_app.logger.debug('adding in downstream comments:%d', len(downstream_cmts)) for dc in downstream_cmts: if dc.get('url'): to_process.append((dc, dc.get('url'))) results = [] for entry, url in to_process: current_app.logger.debug('processing %s %r', url, entry) content = util.clean_foreign_html(entry.get('content', '')) content_plain = util.format_as_text(content) published = entry.get('published') if not published: published = datetime.datetime.utcnow() # update an existing mention mention = next((m for m in post.mentions if m.url == url), None)\ if post else None # or create a new one if not mention: mention = Mention() mention.url = url mention.person_mention = is_person_mention mention.permalink = entry.get('url') or url mention.reftype = comment_type[0] if comment_type else 'reference' mention.author_name = entry.get('author', {}).get('name', '') mention.author_url = entry.get('author', {}).get('url', '') mention.author_image = entry.get('author', {}).get('photo') mention.content = content mention.content_plain = content_plain mention.published = published mention.title = entry.get('name') mention.syndication = entry.get('syndication', []) mention.rsvp = entry.get('rsvp') results.append(mention) return results
def create_dmention(post, url): target_urls = [ post.permalink, post.permalink_without_slug, post.short_permalink, post.permalink.replace('https://', 'http://'), post.permalink_without_slug.replace('https://', 'http://'), post.short_permalink.replace('https://', 'http://'), ] if post else [] try: blob = archiver.load_json_from_archive(url) if blob: entry = mf2util.interpret_comment(blob, url, target_urls) if entry: comment_type = entry.get('comment_type') content = entry.get('content', '') content_plain = format_as_text(content) content_words = jinja2.filters.do_wordcount(content_plain) author_name = bleach.clean( entry.get('author', {}).get('name', '')) author_image = entry.get('author', {}).get('photo') if author_image: author_image = local_mirror_resource(author_image) return DMention( permalink=entry.get('url', ''), reftype=comment_type and comment_type[0], author_name=author_name, author_url=entry.get('author', {}).get('url', ''), author_image=author_image or url_for( 'static', filename=AUTHOR_PLACEHOLDER), content=content, content_plain=content_plain, content_words=content_words, pub_date=entry.get('published'), pub_date_iso=isotime_filter(entry.get('published')), pub_date_human=human_time(entry.get('published')), title=entry.get('name'), deleted=False, syndication=[format_syndication_url(s, False) for s in entry.get('syndication', [])], children=[] ) except: app.logger.exception('error interpreting {}', url) return DMention( permalink=url, reftype='reference', author_name=None, author_url=None, author_image=None, content=None, content_plain=None, content_words=0, pub_date=None, pub_date_iso=None, pub_date_human=None, title=None, deleted=False, syndication=[], children=[] )
def create_mentions(post, url, source_response): target_urls = [] if post: base_target_urls = [post.permalink] for base_url in base_target_urls: target_urls.append(base_url) target_urls.append( base_url.replace("https://", "http://") if base_url.startswith("https://") else base_url.replace("http://", "https://") ) blob = mf2py.parse(doc=source_response.text, url=url) if not blob: current_app.logger.debug("create_mention: no mf2 in source_response") return entry = mf2util.interpret_comment(blob, url, target_urls) current_app.logger.debug("interpreted comment: %r", entry) if not entry: current_app.logger.debug("create_mention: mf2util found no comment entry") return comment_type = entry.get("comment_type", []) to_process = [(entry, url)] # process 2nd level "downstream" comments if "reply" in comment_type: downstream_cmts = entry.get("comment", []) current_app.logger.debug("adding in downstream comments:%d", len(downstream_cmts)) for dc in downstream_cmts: if dc.get("url"): to_process.append((dc, dc.get("url"))) results = [] for entry, url in to_process: current_app.logger.debug("processing %s %r", url, entry) content = util.clean_foreign_html(entry.get("content", "")) content_plain = util.format_as_text(content) published = entry.get("published") if not published: published = datetime.datetime.utcnow() # update an existing mention mention = next((m for m in post.mentions if m.url == url), None) # or create a new one if not mention: mention = Mention() mention.url = url mention.permalink = entry.get("url") or url mention.reftype = comment_type[0] if comment_type else "reference" mention.author_name = entry.get("author", {}).get("name", "") mention.author_url = entry.get("author", {}).get("url", "") mention.author_image = entry.get("author", {}).get("photo") mention.content = content mention.content_plain = content_plain mention.published = published mention.title = entry.get("name") mention.syndication = entry.get("syndication", []) mention.rsvp = entry.get("rsvp") results.append(mention) return results
import mf2py import mf2util import pprint source_url = r'https://brid.gy/comment/twitter/desmondrivet/1117876830478852096/1118148721034891264' target_url = r'https://desmondrivet.com/2019/04/15/20190415154611' parsed = mf2py.Parser(url=source_url).to_dict() comment = mf2util.interpret_comment(parsed, source_url, [target_url]) general = mf2util.interpret(parsed, source_url) pprint.pprint(parsed) print('-----\n') pprint.pprint(comment)
def webmention(username): user = User.query.filter_by(username=username).first() if not user: abort(404) source = request.form.get('source') if not source: return make_response('No source parameter', 400) target = request.form.get('target') if not target: return make_response('No target parameter', 400) if not url_matches_domain(target, user.domain): return make_response( '{} is not a child of user domain {}'.format(target, user.domain), 400) target_resp = requests.get(target) if target_resp.status_code // 100 != 2: return make_response('Target does not exist', 400) canonical_target_url = target_resp.url alternate_target_urls = (target, canonical_target_url) # check whether the source links to the target or possibly the url # that target redirects to source_resp = requests.get(source) link_to_target = find_link_to_target(source, source_resp, alternate_target_urls) if not link_to_target: current_app.logger.warn( 'Webmention source %s does not appear to link to target %s.', source, target) return make_response( 'Could not find any links from source to target', 400) # get or create a Post based on the canonical target URL post = Post.query.filter_by( user=user, permalink=canonical_target_url).first() if not post: post = Post() post.user = user post.permalink = canonical_target_url db.session.add(post) # user owns the target, and source links to the target. interp = mf2util.interpret_comment( mf2py.Parser(url=source, doc=source_resp.text).to_dict(), source, alternate_target_urls) comment = Comment.query.filter_by(post=post, source=source).first() if not comment: comment = Comment() comment.post = post comment.recieved = datetime.datetime.now() db.session.add(comment) comment.source = source comment.permalink = interp.get('url') comment.published = interp.get('published') comment.author_name = interp.get('author', {}).get('name') comment.author_image = interp.get('author', {}).get('photo') comment.author_url = interp.get('author', {}).get('url') comment.title = interp.get('name') comment.content = interp.get('content') comment.rsvp = interp.get('rsvp') for known_type in ('reply', 'repost', 'like', 'rsvp'): if known_type in interp.get('comment_type', []): comment.type = known_type break else: comment.type = 'mention' db.session.commit() return 'received {} on {}'.format( comment.type, post.permalink)