Пример #1
0
def test_reply_h_cite():
    # reply with reply-context from aaronnparecki.com
    parsed = load_test('reply_h-cite')
    result = mf2util.interpret_comment(
        parsed, 'http://aaronparecki.com/replies/2014/05/10/1/indieweb',
        ['http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'http://datahiveconsulting.com/2014/04/10'])

    assert result['type'] == 'entry'
    assert not result.get('name')
    assert "We're working on it ;-)" in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 5, 10, 14, 48, 33)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['comment_type'] == ['reply']
    assert result['in-reply-to'] == [{
        'type': 'cite',
        'author': {
            'name': 'Lynne Baer',
            'photo': 'http://aaronparecki.com/images/nouns/user.svg',
            'url': 'http://datahiveconsulting.com/author/lynne/',
        },
        'content': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
        'content-plain': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
        'url': 'http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/',
        'syndication': [],
    }]
    assert result['syndication'] == ['https://twitter.com/aaronpk/status/465247041078034432']
Пример #2
0
def test_reply_h_cite():
    # reply with reply-context from aaronnparecki.com
    parsed = load_test('reply_h-cite')
    result = mf2util.interpret_comment(
        parsed, 'http://aaronparecki.com/replies/2014/05/10/1/indieweb',
        ['http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'http://datahiveconsulting.com/2014/04/10'])

    assert result['type'] == 'entry'
    assert not result.get('name')
    assert "We're working on it ;-)" in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 5, 10, 14, 48, 33)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['comment_type'] == ['reply']
    assert result['in-reply-to'] == [{
        'type': 'cite',
        'author': {
            'name': 'Lynne Baer',
            'photo': 'http://aaronparecki.com/images/nouns/user.svg',
            'url': 'http://datahiveconsulting.com/author/lynne/',
        },
        'content': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
        'url': 'http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/',
        'syndication': [],
    }]
    assert result['syndication'] == ['https://twitter.com/aaronpk/status/465247041078034432']
Пример #3
0
def test_reply_invite():
    parsed = load_test('reply_invite')
    result = mf2util.interpret_comment(
        parsed, 'https://www.facebook.com/1565113317092307#10155109753190015',
        ['https://kylewm.com/2015/03/homebrew-website-club-2015-march-25'])
    assert result['name'] == 'invited'
    assert result['comment_type'] == ['invite', 'reply']
    assert result['invitees'] == [{
        'name': 'Silona Bonewald',
        'url': 'https://www.facebook.com/10155109753190015',
        'photo': 'https://graph.facebook.com/v2.2/10155109753190015/picture?type=large',
    }]
Пример #4
0
def parse_mention(doc, mf, source_url, target_url):
	"""Parse the microformat received to generate a mention for target_url
	"""

	mention_dict = mf2util.interpret_comment(mf, source_url, target_url)

	# if no h-entry do last resort from html
	# insert safe default parsings

	mention_dict['comment_type'].append('mention')

	return mention_dict
Пример #5
0
def test_reply_invite():
    parsed = load_test('reply_invite')
    result = mf2util.interpret_comment(
        parsed, 'https://www.facebook.com/1565113317092307#10155109753190015',
        ['https://kylewm.com/2015/03/homebrew-website-club-2015-march-25'])
    assert result['name'] == 'invited'
    assert result['comment_type'] == ['invite', 'reply']
    assert result['invitees'] == [{
        'name': 'Silona Bonewald',
        'url': 'https://www.facebook.com/10155109753190015',
        'photo': 'https://graph.facebook.com/v2.2/10155109753190015/picture?type=large',
    }]
Пример #6
0
def test_reply_rsvp():
    parsed = load_test('reply_rsvp')
    result = mf2util.interpret_comment(
        parsed, 'https://snarfed.org/2014-05-05_homebrew-website-club-3',
        ['http://werd.io/2014/homebrew-website-club-4'])

    assert result['type'] == 'entry'
    assert result['name'] == 'Homebrew Website Club'
    assert '<a class="u-in-reply-to"' in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 5, 5, 10, 10, 53)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['comment_type'] == ['rsvp', 'reply']
    assert result['rsvp'] == 'yes'
Пример #7
0
def test_reply_rsvp():
    parsed = load_test('reply_rsvp')
    result = mf2util.interpret_comment(
        parsed, 'https://snarfed.org/2014-05-05_homebrew-website-club-3',
        ['http://werd.io/2014/homebrew-website-club-4'])

    assert result['type'] == 'entry'
    assert result['name'] == 'Homebrew Website Club'
    assert '<a class="u-in-reply-to"' in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 5, 5, 10, 10, 53)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['comment_type'] == ['rsvp', 'reply']
    assert result['rsvp'] == 'yes'
Пример #8
0
def test_u_in_reply_to():
    # reply with simple u-in-reply-to link from snarfed.org
    parsed = load_test('reply_u-in-reply-to')
    result = mf2util.interpret_comment(
        parsed, 'https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile',
        ['https://willnorris.com/2014/03/display-likes-in-a-facepile'])

    assert result['type'] == 'entry'
    assert result['name'] == 'Re: Display likes in a facepile'
    assert 'oh man, so cool!' in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 3, 9, 22, 48, 22)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['in-reply-to'] == [{'url': 'https://willnorris.com/2014/03/display-likes-in-a-facepile'}]
    assert result['comment_type'] == ['reply']
Пример #9
0
def test_u_in_reply_to():
    # reply with simple u-in-reply-to link from snarfed.org
    parsed = load_test('reply_u-in-reply-to')
    result = mf2util.interpret_comment(
        parsed, 'https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile',
        ['https://willnorris.com/2014/03/display-likes-in-a-facepile'])

    assert result['type'] == 'entry'
    assert result['name'] == 'Re: Display likes in a facepile'
    assert 'oh man, so cool!' in result.get('content')
    assert result['published'].replace(tzinfo=None)\
        == datetime(2014, 3, 9, 22, 48, 22)
    assert result['published'].utcoffset() == timedelta(hours=-7)
    assert result['in-reply-to'] == [{'url': 'https://willnorris.com/2014/03/display-likes-in-a-facepile'}]
    assert result['comment_type'] == ['reply']
Пример #10
0
def create_mention(post, url, source_response):
    target_urls = []
    if post:
        base_target_urls = [post.permalink]

        for base_url in base_target_urls:
            target_urls.append(base_url)
            target_urls.append(base_url.replace('https://', 'http://')
                               if base_url.startswith('https://')
                               else base_url.replace('http://', 'https://'))

    blob = mf2py.Parser(doc=source_response.text, url=url).to_dict()
    if not blob:
        app.logger.debug('create_mention: no mf2 in source_response')
        return
    entry = mf2util.interpret_comment(blob, url, target_urls)
    if not entry:
        app.logger.debug('create_mention: mf2util found no comment entry')
        return
    comment_type = entry.get('comment_type')

    content = util.clean_foreign_html(entry.get('content', ''))
    content_plain = util.format_as_text(content)

    published = entry.get('published')
    if not published:
        published = datetime.datetime.utcnow()

    # update an existing mention
    mention = next((m for m in post.mentions if m.url == url), None)
    # or create a new one
    if not mention:
        mention = Mention()
    mention.url = url
    mention.permalink = entry.get('url') or url
    mention.reftype = comment_type[0] if comment_type else 'reference'
    mention.author_name = entry.get('author', {}).get('name', '')
    mention.author_url = entry.get('author', {}).get('url', '')
    mention.author_image = entry.get('author', {}).get('photo')
    mention.content = content
    mention.content_plain = content_plain
    mention.published = published
    mention.title = entry.get('name')
    mention.syndication = entry.get('syndication', [])
    mention.rsvp = entry.get('rsvp')
    return mention
Пример #11
0
def attach_webmentions(article, all_webmentions):
    wm_for_article = all_webmentions.get(f'/{article.url}', [])
    for wm in wm_for_article:
        comment = mf2util.interpret_comment(wm['parsedSource'],
                                            wm['sourceUrl'],
                                            [wm['targetUrl']])
        if comment['comment_type']:
            comment_type = comment['comment_type'][0]
            if comment_type == 'like':
                article.webmentions.likes.append(comment)
            elif comment_type == 'repost':
                article.webmentions.reposts.append(comment)
            elif comment_type == 'reply':
                article.webmentions.replies.append(comment)
            else:
                print(f'Unrecognized comment type: {comment_type}')
                article.webmentions.unclassified.append(comment)
        else:
            print('No comment type parsed')
            article.webmentions.unclassified.append(comment)
Пример #12
0
def create_mentions(post, url, source_response, is_person_mention):
    # utility function for mf2util
    cached_mf2 = {}

    def fetch_mf2(url):
        if url in cached_mf2:
            return cached_mf2[url]
        p = mf2py.parse(url=url)
        cached_mf2[url] = p
        return p

    target_urls = []
    if post:
        base_target_urls = [post.permalink]

        for base_url in base_target_urls:
            target_urls.append(base_url)
            target_urls.append(base_url.replace('https://', 'http://')
                               if base_url.startswith('https://')
                               else base_url.replace('http://', 'https://'))

    blob = mf2py.parse(doc=source_response.text, url=url)
    cached_mf2[url] = blob

    if not blob:
        current_app.logger.debug('create_mention: no mf2 in source_response')
        return
    entry = mf2util.interpret_comment(
        blob, url, target_urls, fetch_mf2_func=fetch_mf2)
    current_app.logger.debug('interpreted comment: %r', entry)

    if not entry:
        current_app.logger.debug(
            'create_mention: mf2util found no comment entry')
        return
    comment_type = entry.get('comment_type', [])

    to_process = [(entry, url)]
    # process 2nd level "downstream" comments
    if 'reply' in comment_type:
        downstream_cmts = entry.get('comment', [])
        current_app.logger.debug('adding in downstream comments:%d',
                                 len(downstream_cmts))
        for dc in downstream_cmts:
            if dc.get('url'):
                to_process.append((dc, dc.get('url')))

    results = []
    for entry, url in to_process:
        current_app.logger.debug('processing %s %r', url, entry)
        content = util.clean_foreign_html(entry.get('content', ''))
        content_plain = util.format_as_text(content)

        published = entry.get('published')
        if not published:
            published = datetime.datetime.utcnow()

        # update an existing mention
        mention = next((m for m in post.mentions if m.url == url), None)\
                  if post else None

        # or create a new one
        if not mention:
            mention = Mention()
        mention.url = url
        mention.person_mention = is_person_mention
        mention.permalink = entry.get('url') or url
        mention.reftype = comment_type[0] if comment_type else 'reference'
        mention.author_name = entry.get('author', {}).get('name', '')
        mention.author_url = entry.get('author', {}).get('url', '')
        mention.author_image = entry.get('author', {}).get('photo')
        mention.content = content
        mention.content_plain = content_plain
        mention.published = published
        mention.title = entry.get('name')
        mention.syndication = entry.get('syndication', [])
        mention.rsvp = entry.get('rsvp')
        results.append(mention)

    return results
Пример #13
0
def create_mentions(post, url, source_response, is_person_mention):
    # utility function for mf2util
    cached_mf2 = {}

    def fetch_mf2(url):
        if url in cached_mf2:
            return cached_mf2[url]
        p = mf2py.parse(url=url)
        cached_mf2[url] = p
        return p

    target_urls = []
    if post:
        base_target_urls = [post.permalink]

        for base_url in base_target_urls:
            target_urls.append(base_url)
            target_urls.append(
                base_url.replace('https://', 'http://') if base_url.startswith(
                    'https://') else base_url.replace('http://', 'https://'))

    blob = mf2py.parse(doc=source_response.text, url=url)
    cached_mf2[url] = blob

    if not blob:
        current_app.logger.debug('create_mention: no mf2 in source_response')
        return
    entry = mf2util.interpret_comment(blob,
                                      url,
                                      target_urls,
                                      fetch_mf2_func=fetch_mf2)
    current_app.logger.debug('interpreted comment: %r', entry)

    if not entry:
        current_app.logger.debug(
            'create_mention: mf2util found no comment entry')
        return
    comment_type = entry.get('comment_type', [])

    to_process = [(entry, url)]
    # process 2nd level "downstream" comments
    if 'reply' in comment_type:
        downstream_cmts = entry.get('comment', [])
        current_app.logger.debug('adding in downstream comments:%d',
                                 len(downstream_cmts))
        for dc in downstream_cmts:
            if dc.get('url'):
                to_process.append((dc, dc.get('url')))

    results = []
    for entry, url in to_process:
        current_app.logger.debug('processing %s %r', url, entry)
        content = util.clean_foreign_html(entry.get('content', ''))
        content_plain = util.format_as_text(content)

        published = entry.get('published')
        if not published:
            published = datetime.datetime.utcnow()

        # update an existing mention
        mention = next((m for m in post.mentions if m.url == url), None)\
                  if post else None

        # or create a new one
        if not mention:
            mention = Mention()
        mention.url = url
        mention.person_mention = is_person_mention
        mention.permalink = entry.get('url') or url
        mention.reftype = comment_type[0] if comment_type else 'reference'
        mention.author_name = entry.get('author', {}).get('name', '')
        mention.author_url = entry.get('author', {}).get('url', '')
        mention.author_image = entry.get('author', {}).get('photo')
        mention.content = content
        mention.content_plain = content_plain
        mention.published = published
        mention.title = entry.get('name')
        mention.syndication = entry.get('syndication', [])
        mention.rsvp = entry.get('rsvp')
        results.append(mention)

    return results
Пример #14
0
def create_dmention(post, url):
    target_urls = [
        post.permalink,
        post.permalink_without_slug,
        post.short_permalink,
        post.permalink.replace('https://', 'http://'),
        post.permalink_without_slug.replace('https://', 'http://'),
        post.short_permalink.replace('https://', 'http://'),
    ] if post else []

    try:
        blob = archiver.load_json_from_archive(url)
        if blob:
            entry = mf2util.interpret_comment(blob, url, target_urls)
            if entry:
                comment_type = entry.get('comment_type')

                content = entry.get('content', '')
                content_plain = format_as_text(content)
                content_words = jinja2.filters.do_wordcount(content_plain)

                author_name = bleach.clean(
                    entry.get('author', {}).get('name', ''))
                author_image = entry.get('author', {}).get('photo')
                if author_image:
                    author_image = local_mirror_resource(author_image)

                return DMention(
                    permalink=entry.get('url', ''),
                    reftype=comment_type and comment_type[0],
                    author_name=author_name,
                    author_url=entry.get('author', {}).get('url', ''),
                    author_image=author_image or url_for(
                        'static', filename=AUTHOR_PLACEHOLDER),
                    content=content,
                    content_plain=content_plain,
                    content_words=content_words,
                    pub_date=entry.get('published'),
                    pub_date_iso=isotime_filter(entry.get('published')),
                    pub_date_human=human_time(entry.get('published')),
                    title=entry.get('name'),
                    deleted=False,
                    syndication=[format_syndication_url(s, False) for s
                                 in entry.get('syndication', [])],
                    children=[]
                )

    except:
        app.logger.exception('error interpreting {}', url)

    return DMention(
        permalink=url,
        reftype='reference',
        author_name=None,
        author_url=None,
        author_image=None,
        content=None,
        content_plain=None,
        content_words=0,
        pub_date=None,
        pub_date_iso=None,
        pub_date_human=None,
        title=None,
        deleted=False,
        syndication=[],
        children=[]
    )
Пример #15
0
def create_mentions(post, url, source_response):
    target_urls = []
    if post:
        base_target_urls = [post.permalink]

        for base_url in base_target_urls:
            target_urls.append(base_url)
            target_urls.append(
                base_url.replace("https://", "http://")
                if base_url.startswith("https://")
                else base_url.replace("http://", "https://")
            )

    blob = mf2py.parse(doc=source_response.text, url=url)
    if not blob:
        current_app.logger.debug("create_mention: no mf2 in source_response")
        return
    entry = mf2util.interpret_comment(blob, url, target_urls)
    current_app.logger.debug("interpreted comment: %r", entry)

    if not entry:
        current_app.logger.debug("create_mention: mf2util found no comment entry")
        return
    comment_type = entry.get("comment_type", [])

    to_process = [(entry, url)]
    # process 2nd level "downstream" comments
    if "reply" in comment_type:
        downstream_cmts = entry.get("comment", [])
        current_app.logger.debug("adding in downstream comments:%d", len(downstream_cmts))
        for dc in downstream_cmts:
            if dc.get("url"):
                to_process.append((dc, dc.get("url")))

    results = []
    for entry, url in to_process:
        current_app.logger.debug("processing %s %r", url, entry)
        content = util.clean_foreign_html(entry.get("content", ""))
        content_plain = util.format_as_text(content)

        published = entry.get("published")
        if not published:
            published = datetime.datetime.utcnow()

        # update an existing mention
        mention = next((m for m in post.mentions if m.url == url), None)
        # or create a new one
        if not mention:
            mention = Mention()
        mention.url = url
        mention.permalink = entry.get("url") or url
        mention.reftype = comment_type[0] if comment_type else "reference"
        mention.author_name = entry.get("author", {}).get("name", "")
        mention.author_url = entry.get("author", {}).get("url", "")
        mention.author_image = entry.get("author", {}).get("photo")
        mention.content = content
        mention.content_plain = content_plain
        mention.published = published
        mention.title = entry.get("name")
        mention.syndication = entry.get("syndication", [])
        mention.rsvp = entry.get("rsvp")
        results.append(mention)

    return results
Пример #16
0
import mf2py
import mf2util
import pprint

source_url = r'https://brid.gy/comment/twitter/desmondrivet/1117876830478852096/1118148721034891264'
target_url = r'https://desmondrivet.com/2019/04/15/20190415154611'

parsed = mf2py.Parser(url=source_url).to_dict()
comment = mf2util.interpret_comment(parsed, source_url, [target_url])
general = mf2util.interpret(parsed, source_url)

pprint.pprint(parsed)
print('-----\n')
pprint.pprint(comment)
Пример #17
0
def webmention(username):
    user = User.query.filter_by(username=username).first()
    if not user:
        abort(404)

    source = request.form.get('source')
    if not source:
        return make_response('No source parameter', 400)

    target = request.form.get('target')
    if not target:
        return make_response('No target parameter', 400)

    if not url_matches_domain(target, user.domain):
        return make_response(
            '{} is not a child of user domain {}'.format(target, user.domain),
            400)

    target_resp = requests.get(target)
    if target_resp.status_code // 100 != 2:
        return make_response('Target does not exist', 400)
    canonical_target_url = target_resp.url
    alternate_target_urls = (target, canonical_target_url)

    # check whether the source links to the target or possibly the url
    # that target redirects to
    source_resp = requests.get(source)
    link_to_target = find_link_to_target(source, source_resp,
                                         alternate_target_urls)
    if not link_to_target:
        current_app.logger.warn(
            'Webmention source %s does not appear to link to target %s.',
            source, target)
        return make_response(
            'Could not find any links from source to target', 400)

    # get or create a Post based on the canonical target URL
    post = Post.query.filter_by(
        user=user, permalink=canonical_target_url).first()
    if not post:
        post = Post()
        post.user = user
        post.permalink = canonical_target_url
        db.session.add(post)

    # user owns the target, and source links to the target.
    interp = mf2util.interpret_comment(
        mf2py.Parser(url=source, doc=source_resp.text).to_dict(),
        source, alternate_target_urls)

    comment = Comment.query.filter_by(post=post, source=source).first()
    if not comment:
        comment = Comment()
        comment.post = post
        comment.recieved = datetime.datetime.now()
        db.session.add(comment)

    comment.source = source
    comment.permalink = interp.get('url')
    comment.published = interp.get('published')
    comment.author_name = interp.get('author', {}).get('name')
    comment.author_image = interp.get('author', {}).get('photo')
    comment.author_url = interp.get('author', {}).get('url')
    comment.title = interp.get('name')
    comment.content = interp.get('content')
    comment.rsvp = interp.get('rsvp')

    for known_type in ('reply', 'repost', 'like', 'rsvp'):
        if known_type in interp.get('comment_type', []):
            comment.type = known_type
            break
    else:
        comment.type = 'mention'

    db.session.commit()

    return 'received {} on {}'.format(
        comment.type, post.permalink)